예제 #1
0
void Clusterizer::splitHotColdClusters() {
  // compute the average weight of each cluster
  jit::vector<uint64_t> clusterAvgWgt(m_clusters.size());
  for (size_t c = 0; c < m_clusters.size(); c++) {
    uint64_t totalWeight = 0;
    uint64_t totalSize   = 0;
    for (auto b : m_clusters[c]) {
      const auto numInsts = m_unit.blocks[b].code.size();
      totalSize   += numInsts;
      totalWeight += numInsts * m_scale.weight(b);
    }
    clusterAvgWgt[c] = totalSize == 0 ? 0 : totalWeight / totalSize;
  }

  const auto entryAvgWgt = clusterAvgWgt[m_blockCluster[m_unit.entry]];
  const uint64_t hotThreshold = entryAvgWgt *
                                RuntimeOption::EvalJitLayoutHotThreshold;
  FTRACE(3, "splitHotColdClusters: entryAvgWgt = {} ; hotThreshold = {}\n",
         entryAvgWgt, hotThreshold);

  for (auto cid : m_clusterOrder) {
    if (m_clusters[cid].size() == 0) continue;
    const AreaIndex area = clusterAvgWgt[cid] >= hotThreshold ? AreaIndex::Main
                                                              : AreaIndex::Cold;
    FTRACE(3, "  -> C{}: {} (avg wgt = {}): ",
           cid, area_names[unsigned(area)], clusterAvgWgt[cid]);
    for (auto b : m_clusters[cid]) {
      // don't reassign blocks that are in frozen
      if (m_unit.blocks[b].area_idx == AreaIndex::Frozen) continue;
      m_unit.blocks[b].area_idx = area;
      FTRACE(3, "{}, ", b);
    }
    FTRACE(3, "\n");
  }
}
예제 #2
0
/*
 * Merge two state-stacks.  The stacks must have the same depth.  Returns
 * whether any states changed.
 */
bool merge_into(jit::vector<FrameState>& dst, const jit::vector<FrameState>& src) {
  always_assert(src.size() == dst.size());
  auto changed = false;
  for (auto idx = uint32_t{0}; idx < dst.size(); ++idx) {
    changed |= merge_into(dst[idx], src[idx]);
  }
  return changed;
}
예제 #3
0
void Clusterizer::initClusters() {
  m_clusters.resize(m_unit.blocks.size());
  m_blockCluster.resize(m_unit.blocks.size());
  for (auto b : m_blocks) {
    m_clusters[b].push_back(b);
    m_blockCluster[b] = b;
  }
}
예제 #4
0
bool merge_memory_stack_into(jit::vector<StackState>& dst,
                             const jit::vector<StackState>& src) {
  auto changed = false;
  // We may need to merge different-sized memory stacks, because a predecessor
  // may not touch some stack memory that another pred did.  We just need to
  // conservatively throw away slots that aren't tracked on all preds.
  auto const result_size = std::min(dst.size(), src.size());
  dst.resize(result_size);
  for (auto i = uint32_t{0}; i < result_size; ++i) {
    changed |= merge_into(dst[i], src[i]);
  }
  return changed;
}
예제 #5
0
void DFSSortClusters::dfs(uint32_t cid) {

  if (m_visited.test(cid)) return;
  m_visited.set(cid);
  m_list.push_back(Vlabel(cid));

  // find the best successor, which is the one to which cid has the
  // highest weight among the ones that haven't been visited yet
  int64_t  maxWgt = 0;
  uint32_t bestSucc = uint32_t(-1);
  for (auto& sInfo : m_clusterSuccs[cid]) {
    auto succId = sInfo.first;
    if (m_visited.test(succId)) continue;
    auto wgt = sInfo.second;
    if (wgt >= maxWgt) {
      maxWgt   = wgt;
      bestSucc = succId;
    }
  }

  if (bestSucc == uint32_t(-1)) return;

  // visit bestSucc first
  dfs(bestSucc);

  // now visit the remaining ones
  for (auto& sInfo : m_clusterSuccs[cid]) {
    if (sInfo.first != bestSucc) {
      dfs(sInfo.first);
    }
  }
}
예제 #6
0
void natural_loop_dfs(jit::vector<Block*>& out, Visited& visited, Block* blk) {
  if (visited.test(blk->id())) return;
  visited.set(blk->id());
  blk->forEachPred([&] (Block* pred) {
    natural_loop_dfs(out, visited, pred);
  });
  out.push_back(blk);
}
예제 #7
0
void Vgen::emit(jcc& i) {
  assertx(i.cc != CC_None);
  if (i.targets[1] != i.targets[0]) {
    if (next == i.targets[1]) {
      // the taken branch is the fall-through block, invert the branch.
      i = jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}};
    }
    jccs.push_back({a->frontier(), i.targets[1]});
    // B.cond range is +/- 1MB but this uses BR
    backend.emitSmashableJump(*codeBlock, kEndOfTargetChain, i.cc);
  }
  emit(jmp{i.targets[0]});
}
예제 #8
0
void Vgen::emit(tbcc& i) {
  assertx(i.cc == vixl::ne || i.cc == vixl::eq);
  if (i.targets[1] != i.targets[0]) {
    if (next == i.targets[1]) {
      // the taken branch is the fall-through block, invert the branch.
      i = tbcc{i.cc == vixl::ne ? vixl::eq : vixl::ne, i.bit, i.s,
               {i.targets[1], i.targets[0]}};
    }
    bccs.push_back({a->frontier(), i.targets[1]});
    // offset range +/- 32KB
    if (i.cc == vixl::ne) {
      a->tbnz(X(i.s), i.bit, 0);
    } else {
      a->tbz(X(i.s), i.bit, 0);
    }
  }
  emit(jmp{i.targets[0]});
}
예제 #9
0
void Vgen::emit(jmp i) {
  if (next == i.target) return;
  jmps.push_back({a->frontier(), i.target});
  // B range is +/- 128MB but this uses BR
  backend.emitSmashableJump(*codeBlock, kEndOfTargetChain, CC_None);
}
예제 #10
0
void Vgen::emit(hcunwind& i) {
  catches.push_back({points[i.call], i.targets[1]});
  emit(jmp{i.targets[0]});
}
예제 #11
0
// overall emitter
void Vgen::emit(jit::vector<Vlabel>& labels) {
  // Some structures here track where we put things just for debug printing.
  struct Snippet {
    const IRInstruction* origin;
    TcaRange range;
  };
  struct BlockInfo {
    jit::vector<Snippet> snippets;
  };

  // This is under the printir tracemod because it mostly shows you IR and
  // machine code, not vasm and machine code (not implemented).
  bool shouldUpdateAsmInfo = !!m_asmInfo
    && Trace::moduleEnabledRelease(HPHP::Trace::printir, kCodeGenLevel);

  std::vector<TransBCMapping>* bcmap = nullptr;
  if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) {
    bcmap = &mcg->cgFixups().m_bcMap;
  }

  jit::vector<jit::vector<BlockInfo>> areaToBlockInfos;
  if (shouldUpdateAsmInfo) {
    areaToBlockInfos.resize(areas.size());
    for (auto& r : areaToBlockInfos) {
      r.resize(unit.blocks.size());
    }
  }

  for (int i = 0, n = labels.size(); i < n; ++i) {
    assertx(checkBlockEnd(unit, labels[i]));

    auto b = labels[i];
    auto& block = unit.blocks[b];
    codeBlock = &area(block.area).code;
    vixl::MacroAssembler as { *codeBlock };
    a = &as;
    auto blockStart = a->frontier();
    addrs[b] = blockStart;

    {
      // Compute the next block we will emit into the current area.
      auto cur_start = start(labels[i]);
      auto j = i + 1;
      while (j < labels.size() && cur_start != start(labels[j])) {
        j++;
      }
      next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size());
    }

    const IRInstruction* currentOrigin = nullptr;
    auto blockInfo = shouldUpdateAsmInfo
      ? &areaToBlockInfos[unsigned(block.area)][b]
      : nullptr;
    auto start_snippet = [&](Vinstr& inst) {
      if (!shouldUpdateAsmInfo) return;

      blockInfo->snippets.push_back(
        Snippet { inst.origin, TcaRange { codeBlock->frontier(), nullptr } }
      );
    };
    auto finish_snippet = [&] {
      if (!shouldUpdateAsmInfo) return;

      if (!blockInfo->snippets.empty()) {
        auto& snip = blockInfo->snippets.back();
        snip.range = TcaRange { snip.range.start(), codeBlock->frontier() };
      }
    };

    for (auto& inst : block.code) {
      if (currentOrigin != inst.origin) {
        finish_snippet();
        start_snippet(inst);
        currentOrigin = inst.origin;
      }

      if (bcmap && inst.origin) {
        auto sk = inst.origin->marker().sk();
        if (bcmap->empty() ||
            bcmap->back().md5 != sk.unit()->md5() ||
            bcmap->back().bcStart != sk.offset()) {
          bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(),
                                          main().frontier(), cold().frontier(),
                                          frozen().frontier()});
        }
      }

      switch (inst.op) {
#define O(name, imms, uses, defs) \
        case Vinstr::name: emit(inst.name##_); break;
        VASM_OPCODES
#undef O
      }
    }

    finish_snippet();
  }

  for (auto& p : jccs) {
    assertx(addrs[p.target]);
    backend.smashJcc(p.instr, addrs[p.target]);
  }
  for (auto& p : bccs) {
    assertx(addrs[p.target]);
    auto link = (Instruction*) p.instr;
    link->SetImmPCOffsetTarget(Instruction::Cast(addrs[p.target]));
  }
  for (auto& p : jmps) {
    assertx(addrs[p.target]);
    backend.smashJmp(p.instr, addrs[p.target]);
  }
  for (auto& p : catches) {
    mcg->registerCatchBlock(p.instr, addrs[p.target]);
  }
  for (auto& p : ldpoints) {
    CodeCursor cc(main(), p.instr);
    MacroAssembler a{main()};
    a.Mov(X(p.d), points[p.pos]);
  }

  if (!shouldUpdateAsmInfo) {
    return;
  }

  for (auto i = 0; i < areas.size(); ++i) {
    const IRInstruction* currentOrigin = nullptr;
    auto& blockInfos = areaToBlockInfos[i];
    for (auto const blockID : labels) {
      auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)];
      if (blockInfo.snippets.empty()) continue;

      for (auto const& snip : blockInfo.snippets) {
        if (currentOrigin != snip.origin && snip.origin) {
          currentOrigin = snip.origin;
        }

        m_asmInfo->updateForInstruction(
          currentOrigin,
          static_cast<AreaIndex>(i),
          snip.range.start(),
          snip.range.end());
      }
    }
  }
}