Esempio n. 1
0
std::string show(const RegionDesc& region) {
  return folly::format(
    "Region ({} blocks):\n{}",
    region.blocks().size(),
    [&]{
      std::string ret;
      std::string arcs;
      for (auto& b : region.blocks()) {
        folly::toAppend(show(*b), &ret);
        if (auto r = region.nextRetrans(b->id())) {
          folly::toAppend(folly::format("{} -R-> {}\n", b->id(), r.value()),
                          &arcs);
        }
        for (auto s : region.succs(b->id())) {
          folly::toAppend(folly::format("{} -> {}\n", b->id(), s), &arcs);
        }
      }
      folly::toAppend("Arcs:\n" + arcs, &ret);
      folly::toAppend("Side-exiting Blocks:\n",
                      folly::join(", ", region.sideExitingBlocks()),
                      "\n",
                      &ret);
      return ret;
    }()
  ).str();
}
Esempio n. 2
0
std::string show(const RegionDesc& region) {
  std::string ret{folly::sformat("Region ({} blocks):\n",
                                 region.blocks().size())};

  auto profData = mcg->tx().profData();

  auto weight = [&] (RegionDesc::BlockPtr b) -> int64_t {
    if (!profData) return 0;
    auto tid = b->profTransID();
    if (tid == kInvalidTransID) return 0;
    return profData->transCounter(tid);
  };

  uint64_t maxBlockWgt = 1; // avoid div by 0

  // Print contents of all blocks in pure text format.
  for (auto& b : region.blocks()) {
    folly::toAppend(show(*b), &ret);
    auto w = weight(b);
    if (w > maxBlockWgt) maxBlockWgt = w;
  }

  // Print CFG in dot format, coloring the blocks based on hotness.
  // Print all the blocks first.
  folly::toAppend("\ndigraph RegionCFG {\n node[shape=box,style=filled]\n",
                  &ret);
  for (auto& b : region.blocks()) {
    auto const id = b->id();
    auto const& mergedSet = region.merged(id);
    std::string mergedStr = mergedSet.empty() ? "" :
                            (" (" + folly::join(",", mergedSet) + ")");
    uint32_t coldness = 255 - (255 * weight(b) / maxBlockWgt);
    folly::format(&ret, " \"B{}\" [label=\"B {}{}\\np: {}\","
                  "fillcolor=\"#ff{:02x}{:02x}\"]\n",
                  id, id, mergedStr, weight(b), coldness, coldness);
  }

  // Print arcs in dot format.
  for (auto& b : region.blocks()) {
    if (auto r = region.nextRetrans(b->id())) {
      folly::toAppend(folly::format(" \"B{}\" -> \"B{}\" [label=R,color=red]\n",
                                    b->id(), r.value()), &ret);
    }
    for (auto s : region.succs(b->id())) {
      folly::toAppend(folly::format(" \"B{}\" -> \"B{}\"\n", b->id(), s),
                      &ret);
    }
  }

  ret += "}\n";

  return ret;
}
Esempio n. 3
0
void RegionDesc::copyBlocksFrom(const RegionDesc&  other,
                                BlockVec::iterator where) {
  auto otherBlocks = other.blocks();
  m_blocks.insert(where, otherBlocks.begin(), otherBlocks.end());
  for (auto b : otherBlocks) {
    m_data[b->id()] = BlockData(b);
  }
}
Esempio n. 4
0
/*
 * Checks if the given region is well-formed, which entails the
 * following properties:
 *
 *   1) The region has at least one block.
 *
 *   2) Each block in the region has a different id.
 *
 *   3) All arcs involve blocks within the region.
 *
 *   4) For each arc, the bytecode offset of the dst block must
 *      possibly follow the execution of the src block.
 *
 *   5) Each block contains at most one successor corresponding to a
 *      given SrcKey.
 *
 *   6) The region doesn't contain any loops, unless JitLoops is
 *      enabled.
 *
 *   7) All blocks are reachable from the entry block.
 *
 *   8) For each block, there must be a path from the entry to it that
 *      includes only earlier blocks in the region.
 *
 *   9) The region is topologically sorted unless loops are enabled.
 *
 *  10) The block-retranslation chains cannot have cycles.
 *
 */
bool check(const RegionDesc& region, std::string& error) {

  auto bad = [&](const std::string& errorMsg) {
    error = errorMsg;
    return false;
  };

  // 1) The region has at least one block.
  if (region.empty()) return bad("empty region");

  RegionDesc::BlockIdSet blockSet;
  for (auto b : region.blocks()) {
    auto bid = b->id();
    // 2) Each block in the region has a different id.
    if (blockSet.count(bid)) {
      return bad(folly::sformat("many blocks with id {}", bid));
    }
    blockSet.insert(bid);
  }

  for (auto b : region.blocks()) {
    auto bid = b->id();
    SrcKey    lastSk = region.block(bid)->last();
    OffsetSet validSuccOffsets = lastSk.succOffsets();
    OffsetSet succOffsets;

    for (auto succ : region.succs(bid)) {
      SrcKey succSk = region.block(succ)->start();
      Offset succOffset = succSk.offset();

      // 3) All arcs involve blocks within the region.
      if (blockSet.count(succ) == 0) {
        return bad(folly::sformat("arc with dst not in the region: {} -> {}",
                                  bid, succ));
      }

      // Checks 4) and 5) below don't make sense for arcs corresponding
      // to inlined calls and returns, so skip them in such cases.
      // This won't be possible once task #4076399 is done.
      if (lastSk.func() != succSk.func()) continue;

      // 4) For each arc, the bytecode offset of the dst block must
      //    possibly follow the execution of the src block.
      if (validSuccOffsets.count(succOffset) == 0) {
        return bad(folly::sformat("arc with impossible control flow: {} -> {}",
                                  bid, succ));
      }

      // 5) Each block contains at most one successor corresponding to a
      //    given SrcKey.
      if (succOffsets.count(succOffset) > 0) {
        return bad(folly::sformat("block {} has multiple successors with SK {}",
                                  bid, show(succSk)));
      }
      succOffsets.insert(succOffset);
    }
    for (auto pred : region.preds(bid)) {
      if (blockSet.count(pred) == 0) {
        return bad(folly::sformat("arc with src not in the region: {} -> {}",
                                  pred, bid));
      }
    }
  }

  // 6) is checked by dfsCheck.
  DFSChecker dfsCheck(region);
  if (!dfsCheck.check(region.entry()->id())) {
    return bad("region is cyclic");
  }

  // 7) All blocks are reachable from the entry (first) block.
  if (dfsCheck.numVisited() != blockSet.size()) {
    return bad("region has unreachable blocks");
  }

  // 8) and 9) are checked below.
  RegionDesc::BlockIdSet visited;
  auto& blocks = region.blocks();
  for (unsigned i = 0; i < blocks.size(); i++) {
    auto bid = blocks[i]->id();
    unsigned nVisited = 0;
    for (auto pred : region.preds(bid)) {
      nVisited += visited.count(pred);
    }
    // 8) For each block, there must be a path from the entry to it that
    //    includes only earlier blocks in the region.
    if (nVisited == 0 && i != 0) {
      return bad(folly::sformat("block {} appears before all its predecessors",
                                bid));
    }
    // 9) The region is topologically sorted unless loops are enabled.
    if (!RuntimeOption::EvalJitLoops && nVisited != region.preds(bid).size()) {
      return bad(folly::sformat("non-topological order (bid: {})", bid));
    }
    visited.insert(bid);
  }

  // 10) The block-retranslation chains cannot have cycles.
  for (auto b : blocks) {
    auto bid = b->id();
    RegionDesc::BlockIdSet chainSet;
    chainSet.insert(bid);
    while (auto next = region.nextRetrans(bid)) {
      auto nextId = next.value();
      if (chainSet.count(nextId)) {
        return bad(folly::sformat("cyclic retranslation chain for block {}",
                                  bid));
      }
      chainSet.insert(nextId);
      bid = nextId;
    }
  }

  return true;
}
Esempio n. 5
0
bool InliningDecider::shouldInline(SrcKey callerSk,
                                   const Func* callee,
                                   const RegionDesc& region,
                                   uint32_t maxTotalCost) {
  auto sk = region.empty() ? SrcKey() : region.start();
  assertx(callee);
  assertx(sk.func() == callee);

  // Tracing return lambdas.
  auto refuse = [&] (const char* why) {
    FTRACE(2, "shouldInline: rejecting callee region: {}", show(region));
    return traceRefusal(m_topFunc, callee, why);
  };

  auto accept = [&, this] (const char* kind) {
    FTRACE(2, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n",
           m_topFunc->fullName()->data(), callee->fullName()->data(), kind);
    return true;
  };

  if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) {
    return refuse("inlining stack depth limit exceeded");
  }

  // Even if the func contains NativeImpl we may have broken the trace before
  // we hit it.
  auto containsNativeImpl = [&] {
    for (auto block : region.blocks()) {
      if (!block->empty() && block->last().op() == OpNativeImpl) return true;
    }
    return false;
  };

  // Try to inline CPP builtin functions.
  // The NativeImpl opcode may appear later in the function because of Asserts
  // generated in hhbbc
  if (callee->isCPPBuiltin() && containsNativeImpl()) {
    if (isInlinableCPPBuiltin(callee)) {
      return accept("inlinable CPP builtin");
    }
    return refuse("non-inlinable CPP builtin");
  }

  // If the function may use a VarEnv (which is stored in the ActRec) or may be
  // variadic, we restrict inlined callees to certain whitelisted instructions
  // which we know won't actually require these features.
  const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV;

  bool hasRet = false;

  // Iterate through the region, checking its suitability for inlining.
  for (auto const& block : region.blocks()) {
    sk = block->start();

    for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) {
      auto op = sk.op();

      // We don't allow inlined functions in the region.  The client is
      // expected to disable inlining for the region it gives us to peek.
      if (sk.func() != callee) {
        return refuse("got region with inlined calls");
      }

      // Restrict to VV-safe opcodes if necessary.
      if (needsCheckVVSafe && !isInliningVVSafe(op)) {
        return refuse(folly::format("{} may use dynamic environment",
                                    opcodeToName(op)).str().c_str());
      }

      // Count the returns.
      if (isReturnish(op)) {
        hasRet = true;
      }

      // We can't inline FCallArray.  XXX: Why?
      if (op == Op::FCallArray) {
        return refuse("can't inline FCallArray");
      }
    }
  }

  if (!hasRet) {
    return refuse("region has no returns");
  }

  // Refuse if the cost exceeds our thresholds.
  // We measure the cost of inlining each callstack and stop when it exceeds a
  // certain threshold.  (Note that we do not measure the total cost of all the
  // inlined calls for a given caller---just the cost of each nested stack.)
  const int maxCost = maxTotalCost - m_cost;
  const int cost = computeTranslationCost(callerSk, region);
  if (cost > maxCost) {
    return refuse("too expensive");
  }

  return accept("small region with return");
}
Esempio n. 6
0
void region_prune_arcs(RegionDesc& region) {
  FTRACE(4, "region_prune_arcs\n");

  region.sortBlocks();
  auto const sortedBlocks = region.blocks();

  // Maps region block ids to their RPO ids.
  auto blockToRPO = std::unordered_map<RegionDesc::BlockId,uint32_t>{};

  auto blockInfos = std::vector<BlockInfo>(sortedBlocks.size());
  auto workQ = dataflow_worklist<uint32_t>(sortedBlocks.size());
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& b = sortedBlocks[rpoID];
    auto& binfo = blockInfos[rpoID];
    binfo.blockID = b->id();
    blockToRPO[binfo.blockID] = rpoID;
  }
  workQ.push(0);
  blockInfos[0].in = entry_state(region);

  FTRACE(4, "Iterating:\n");
  do {
    auto const rpoID = workQ.pop();
    auto& binfo = blockInfos[rpoID];
    FTRACE(4, "B{}\n", binfo.blockID);

    binfo.out = binfo.in;
    apply_transfer_function(
      binfo.out,
      region.block(binfo.blockID)->postConds()
    );

    for (auto& succ : region.succs(binfo.blockID)) {
      auto const succRPO = blockToRPO.find(succ);
      assertx(succRPO != end(blockToRPO));
      auto& succInfo = blockInfos[succRPO->second];
      if (preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) {
        if (merge_into(succInfo.in, binfo.out)) {
          FTRACE(5, "  -> {}\n", succInfo.blockID);
          workQ.push(succRPO->second);
        }
      }
    }
  } while (!workQ.empty());

  FTRACE(2, "\nPostConds fixed point:\n{}\n",
    [&] () -> std::string {
      auto ret = std::string{};
      for (auto& s : blockInfos) {
        folly::format(&ret, "B{}:\n{}", s.blockID, show(s.in));
      }
      return ret;
    }()
  );

  // Now remove any edge that looks like it will unconditionally fail type
  // predictions, and completely remove any block that can't be reached.
  using ArcIDs = std::pair<RegionDesc::BlockId,RegionDesc::BlockId>;
  auto toRemove = std::vector<ArcIDs>{};
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& binfo = blockInfos[rpoID];

    for (auto& succ : region.succs(binfo.blockID)) {
      auto const succRPO = blockToRPO.find(succ);
      assertx(succRPO != end(blockToRPO));
      auto const& succInfo = blockInfos[succRPO->second];
      if (!binfo.in.initialized ||
          !succInfo.in.initialized ||
          !preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) {
        FTRACE(2, "Pruning arc: B{} -> B{}\n",
               binfo.blockID,
               succInfo.blockID);
        toRemove.emplace_back(binfo.blockID, succInfo.blockID);
      }
    }

    for (auto& r : toRemove) region.removeArc(r.first, r.second);
    toRemove.clear();
  }

  // Get rid of the completely unreachable blocks, now that any arcs to/from
  // them are gone.
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& binfo = blockInfos[rpoID];
    if (!binfo.in.initialized) {
      FTRACE(2, "Pruning block: B{}\n", binfo.blockID);
      region.deleteBlock(binfo.blockID);
    }
  }
  FTRACE(2, "\n");
}
Esempio n. 7
0
void region_prune_arcs(RegionDesc& region) {
  FTRACE(4, "region_prune_arcs\n");

  region.sortBlocks();
  auto const sortedBlocks = region.blocks();

  // Maps region block ids to their RPO ids.
  auto blockToRPO = std::unordered_map<RegionDesc::BlockId,uint32_t>{};

  auto blockInfos = std::vector<BlockInfo>(sortedBlocks.size());
  auto workQ = dataflow_worklist<uint32_t>(sortedBlocks.size());
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& b = sortedBlocks[rpoID];
    auto& binfo = blockInfos[rpoID];
    binfo.blockID = b->id();
    blockToRPO[binfo.blockID] = rpoID;
  }
  workQ.push(0);
  blockInfos[0].in = entry_state(region);

  FTRACE(4, "Iterating:\n");
  do {
    auto const rpoID = workQ.pop();
    auto& binfo = blockInfos[rpoID];
    FTRACE(4, "B{}\n", binfo.blockID);

    /*
     * This code currently assumes inlined functions were entirely contained
     * within a single profiling translation, and will need updates if we
     * inline bigger things in a way visible to region selection.
     *
     * Note: inlined blocks /may/ have postConditions, if they are the last
     * blocks from profiling translations.  Currently any locations referred to
     * in postconditions for these blocks are for the outermost caller, so this
     * code handles that correctly.
     */
    if (region.block(binfo.blockID)->inlineLevel() != 0) {
      assertx(region.block(binfo.blockID)->typePreConditions().empty());
    }

    binfo.out = binfo.in;
    apply_transfer_function(
      binfo.out,
      region.block(binfo.blockID)->postConds()
    );

    for (auto& succ : region.succs(binfo.blockID)) {
      auto const succRPO = blockToRPO.find(succ);
      assertx(succRPO != end(blockToRPO));
      auto& succInfo = blockInfos[succRPO->second];
      if (preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) {
        if (merge_into(succInfo.in, binfo.out)) {
          FTRACE(5, "  -> {}\n", succInfo.blockID);
          workQ.push(succRPO->second);
        }
      }
    }
  } while (!workQ.empty());

  FTRACE(2, "\nPostConds fixed point:\n{}\n",
    [&] () -> std::string {
      auto ret = std::string{};
      for (auto& s : blockInfos) {
        folly::format(&ret, "B{}:\n{}", s.blockID, show(s.in));
      }
      return ret;
    }()
  );

  // Now remove any edge that looks like it will unconditionally fail type
  // predictions, and completely remove any block that can't be reached.
  using ArcIDs = std::pair<RegionDesc::BlockId,RegionDesc::BlockId>;
  auto toRemove = std::vector<ArcIDs>{};
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& binfo = blockInfos[rpoID];

    for (auto& succ : region.succs(binfo.blockID)) {
      auto const succRPO = blockToRPO.find(succ);
      assertx(succRPO != end(blockToRPO));
      auto const& succInfo = blockInfos[succRPO->second];
      if (!binfo.in.initialized ||
          !succInfo.in.initialized ||
          !preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) {
        FTRACE(2, "Pruning arc: B{} -> B{}\n",
               binfo.blockID,
               succInfo.blockID);
        toRemove.emplace_back(binfo.blockID, succInfo.blockID);
      }
    }

    for (auto& r : toRemove) region.removeArc(r.first, r.second);
    toRemove.clear();
  }

  // Get rid of the completely unreachable blocks, now that any arcs to/from
  // them are gone.
  for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) {
    auto const& binfo = blockInfos[rpoID];
    if (!binfo.in.initialized) {
      FTRACE(2, "Pruning block: B{}\n", binfo.blockID);
      region.deleteBlock(binfo.blockID);
    }
  }
  FTRACE(2, "\n");
}
Esempio n. 8
0
bool InliningDecider::shouldInline(const Func* callee,
                                   const RegionDesc& region) {
  auto sk = region.empty() ? SrcKey() : region.start();
  assertx(callee);
  assertx(sk.func() == callee);

  int cost = 0;

  // Tracing return lambdas.
  auto refuse = [&] (const char* why) {
    return traceRefusal(m_topFunc, callee, why);
  };

  auto accept = [&, this] (const char* kind) {
    FTRACE(1, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n",
           m_topFunc->fullName()->data(), callee->fullName()->data(), kind);

    // Update our context.
    m_costStack.push_back(cost);
    m_cost += cost;
    m_callDepth += 1;
    m_stackDepth += callee->maxStackCells();

    return true;
  };

  // Check inlining depths.
  if (m_callDepth + 1 >= RuntimeOption::EvalHHIRInliningMaxDepth) {
    return refuse("inlining call depth limit exceeded");
  }
  if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) {
    return refuse("inlining stack depth limit exceeded");
  }

  // Even if the func contains NativeImpl we may have broken the trace before
  // we hit it.
  auto containsNativeImpl = [&] {
    for (auto block : region.blocks()) {
      if (!block->empty() && block->last().op() == OpNativeImpl) return true;
    }
    return false;
  };

  // Try to inline CPP builtin functions.
  // The NativeImpl opcode may appear later in the function because of Asserts
  // generated in hhbbc
  if (callee->isCPPBuiltin() && containsNativeImpl()) {
    if (isInlinableCPPBuiltin(callee)) {
      return accept("inlinable CPP builtin");
    }
    return refuse("non-inlinable CPP builtin");
  }

  // If the function may use a VarEnv (which is stored in the ActRec) or may be
  // variadic, we restrict inlined callees to certain whitelisted instructions
  // which we know won't actually require these features.
  const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV;

  // We measure the cost of inlining each callstack and stop when it exceeds a
  // certain threshold.  (Note that we do not measure the total cost of all the
  // inlined calls for a given caller---just the cost of each nested stack.)
  const int maxCost = RuntimeOption::EvalHHIRInliningMaxCost - m_cost;

  // We only inline callee regions that have exactly one return.
  //
  // NOTE: Currently, the tracelet selector uses the first Ret in the child's
  // region to determine when to stop inlining.  However, the safety of this
  // behavior should not be considered guaranteed by InliningDecider; the
  // "right" way to decide when inlining ends is to inline all of `region'.
  int numRets = 0;

  // Iterate through the region, checking its suitability for inlining.
  for (auto const& block : region.blocks()) {
    sk = block->start();

    for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) {
      auto op = sk.op();

      // We don't allow inlined functions in the region.  The client is
      // expected to disable inlining for the region it gives us to peek.
      if (sk.func() != callee) {
        return refuse("got region with inlined calls");
      }

      // Restrict to VV-safe opcodes if necessary.
      if (needsCheckVVSafe && !isInliningVVSafe(op)) {
        return refuse(folly::format("{} may use dynamic environment",
                                    opcodeToName(op)).str().c_str());
      }

      // Count the returns.
      if (isRet(op) || op == Op::NativeImpl) {
        if (++numRets > 1) {
          return refuse("region has too many returns");
        }
        continue;
      }

      // We can't inline FCallArray.  XXX: Why?
      if (op == Op::FCallArray) {
        return refuse("can't inline FCallArray");
      }

      // Assert opcodes don't contribute to the inlining cost.
      if (op == Op::AssertRATL || op == Op::AssertRATStk) continue;

      cost += 1;

      // Add the size of immediate vectors to the cost.
      auto const pc = reinterpret_cast<const Op*>(sk.pc());
      if (hasMVector(op)) {
        cost += getMVector(pc).size();
      } else if (hasImmVector(op)) {
        cost += getImmVector(pc).size();
      }

      // Refuse if the cost exceeds our thresholds.
      if (cost > maxCost) {
        return refuse("too expensive");
      }
    }
  }

  if (numRets != 1) {
    return refuse("region has no returns");
  }
  return accept("small region with single return");
}