Example #1
0
void lowerForARM(Vunit& unit) {
    assertx(check(unit));

    // block order doesn't matter, but only visit reachable blocks.
    auto blocks = sortBlocks(unit);

    for (auto b : blocks) {
        auto oldCode = std::move(unit.blocks[b].code);
        Vout v{unit, b};

        for (auto& inst : oldCode) {
            v.setOrigin(inst.origin);

            switch (inst.op) {
#define O(nm, imm, use, def) \
        case Vinstr::nm: \
          lower(inst.nm##_, v); \
          break;

                VASM_OPCODES
#undef O
            }
        }
    }

    assertx(check(unit));
    printUnit(kVasmARMFoldLevel, "after lowerForARM", unit);
}
Example #2
0
void SenMLRecord::fieldsToJson()
{
    int bnLength = this->_name.length();
    if(bnLength){
        printText("\"n\":\"", 5);
        printText(this->_name.c_str(), bnLength);
        printText("\"", 1);
    }
    if(!isnan(this->_time)){
        printText(",\"t\":", 5);
        printDouble(this->_time, SENML_MAX_DOUBLE_PRECISION);
    }
    if(this->_unit != SENML_UNIT_NONE){
        printText(",\"u\":\"", 6);
        printUnit(this->_unit);
        printText("\"", 1);
    }
    if(this->_updateTime != 0){
        printText(",\"ut\":", 5);
        #ifdef __MBED__
            char buf[10];
            sprintf(buf, "%d", this->_updateTime);
            String val = buf;
        #else
            String val(this->_updateTime);
        #endif
        printText(val.c_str(), val.length());
    }
}
Example #3
0
// Remove dead instructions by doing a traditional liveness analysis.
// instructions that mutate memory, physical registers, or status flags
// are considered useful. All branches are considered useful.
//
// Given SSA, there's a faster sparse version of this algorithm that marks
// useful instructions in one pass, then transitively marks pure instructions
// that define inputs to useful instructions. However it requires a mapping
// from vreg numbers to the instruction that defines them, and a way to address
// individual instructions.
//
// We could remove useless branches by computing the post-dominator tree and
// RDF(b) for each block; then a branch is only useful if it controls whether
// or not a useful block executes, and useless branches can be forwarded to
// the nearest useful post-dominator.
void removeDeadCode(Vunit& unit) {
  auto blocks = sortBlocks(unit);
  jit::vector<LiveSet> livein(unit.blocks.size());
  LiveSet live(unit.next_vr);
  auto pass = [&](bool mutate) {
    bool changed = false;
    for (auto blockIt = blocks.end(); blockIt != blocks.begin();) {
      auto b = *--blockIt;
      auto& block = unit.blocks[b];
      live.reset();
      for (auto s : succs(block)) {
        if (!livein[s].empty()) {
          live |= livein[s];
        }
      }
      for (auto i = block.code.end(); i != block.code.begin();) {
        auto& inst = *--i;
        auto useful = effectful(inst);
        visitDefs(unit, inst, [&](Vreg r) {
          if (r.isPhys() || live.test(r)) {
            useful = true;
            live.reset(r);
          }
        });
        if (useful) {
          visitUses(unit, inst, [&](Vreg r) {
            live.set(r);
          });
        } else if (mutate) {
          inst = nop{};
          changed = true;
        }
      }
      if (mutate) {
        assert(live == livein[b]);
      } else {
        if (live != livein[b]) {
          livein[b] = live;
          changed = true;
        }
      }
    }
    return changed;
  };
  // analyze until livein reaches a fixed point
  while (pass(false)) {}
  // nop-out useless instructions
  if (pass(true)) {
    for (auto b : blocks) {
      auto& code = unit.blocks[b].code;
      auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) {
        return inst.op == Vinstr::nop;
      });
      code.erase(end, code.end());
    }
    printUnit(kVasmDCELevel, "after vasm-dead", unit);
  }
}
Example #4
0
/*
 * Records any type/reffiness predictions we depend on in the region. Guards
 * for locals and stack cells that are not used will be eliminated by the call
 * to relaxGuards.
 */
void RegionFormer::recordDependencies() {
  // Record the incrementally constructed reffiness predictions.
  assertx(!m_region->empty());
  auto& frontBlock = *m_region->blocks().front();
  for (auto const& dep : m_refDeps.m_arMap) {
    frontBlock.addReffinessPred(m_startSk, {dep.second.m_mask,
                                            dep.second.m_vals,
                                            dep.first});
  }

  // Relax guards and record the ones that survived.
  auto& firstBlock = *m_region->blocks().front();
  auto blockStart = firstBlock.start();
  auto& unit = m_irgs.unit;
  auto const doRelax = RuntimeOption::EvalHHIRRelaxGuards;
  bool changed = false;
  if (doRelax) {
    Timer _t(Timer::selectTracelet_relaxGuards);
    // The IR is going to be discarded immediately, so skip reflowing
    // the types in relaxGuards to save JIT time.
    RelaxGuardsFlags flags = m_profiling ? RelaxSimple : RelaxNormal;
    changed = relaxGuards(unit, *m_irgs.irb->guards(), flags);
  }

  auto guardMap = std::map<RegionDesc::Location,Type>{};
  ITRACE(2, "Visiting guards\n");
  visitGuards(unit, [&](const RegionDesc::Location& loc, Type type) {
    Trace::Indent indent;
    ITRACE(3, "{}: {}\n", show(loc), type);
    if (type <= TCls) return;
    auto inret = guardMap.insert(std::make_pair(loc, type));
    if (inret.second) return;
    auto& oldTy = inret.first->second;
    if (oldTy == TGen) {
      // This is the case that we see an inner type prediction for a GuardLoc
      // that got relaxed to Gen.
      return;
    }
    oldTy &= type;
  });

  for (auto& kv : guardMap) {
    if (kv.second == TGen) {
      // Guard was relaxed to Gen---don't record it.
      continue;
    }
    auto const preCond = RegionDesc::TypedLocation { kv.first, kv.second };
    ITRACE(1, "selectTracelet adding guard {}\n", show(preCond));
    firstBlock.addPreCondition(blockStart, preCond);
  }

  if (changed) {
    printUnit(3, unit, " after guard relaxation ", nullptr,
              m_irgs.irb->guards());
  }

}
Example #5
0
/*
 * optimizeExits does two conversions to eliminate common branch-to-exit flows.
 *
 * 1. If we see a jcc that leads to two "identical" blocks ending with
 * bindjmp, then copy the identical part of the targets before the jcc,
 * and replace the jcc with a bindjcc1st instruction using the bytecode
 * destinations from the two original bindjmps. For the sake of this pass,
 * "identical" means matching lea & syncvmsp instructions, and both bindjmp's
 * are for the same function.
 *
 * This leads to more efficient code because the service request stubs will
 * patch jumps in the main trace instead of off-trace.
 *
 * 2. Otherwise, if we see a jcc but only one of the branches is
 * a normal exit, then convert the jcc to a bindexit with the jcc's condition
 * and the original bindjmp's dest.
 */
void optimizeExits(Vunit& unit) {
    auto const pred_counts = count_predecessors(unit);

    PostorderWalker{unit} .dfs([&](Vlabel b) {
        auto& code = unit.blocks[b].code;
        assertx(!code.empty());
        if (code.back().op != Vinstr::jcc) return;

        auto const ijcc = code.back().jcc_;
        auto const t0 = ijcc.targets[0];
        auto const t1 = ijcc.targets[1];
        if (t0 == t1) {
            code.back() = jmp{t0};
            return;
        }
        if (pred_counts[t0] != 1 || pred_counts[t1] != 1) return;

        // copy all but the last instruction in blocks[t] to just before
        // the last instruction in code.
        auto hoist_sync = [&](Vlabel t) {
            const auto& tcode = unit.blocks[t].code;
            code.insert(std::prev(code.end()),
                        tcode.begin(), std::prev(tcode.end()));
        };

        if (match_bindjcc1st(unit, t0, t1)) {
            // hoist the sync instructions from t0 to before the jcc,
            // and replace the jcc with bindjcc1st.
            const auto& bj0 = unit.blocks[t0].code.back().bindjmp_;
            const auto& bj1 = unit.blocks[t1].code.back().bindjmp_;
            hoist_sync(t0);
            code.back() = bindjcc1st{ijcc.cc, ijcc.sf, {bj0.target, bj1.target},
                                     bj0.args | bj1.args};
            return;
        }

        auto fold_exit = [&](ConditionCode cc, Vlabel exit, Vlabel next) {
            const auto& bj = unit.blocks[exit].code.back().bindjmp_;
            auto origin = code.back().origin;
            hoist_sync(exit);
            code.back() = bindjcc{cc, ijcc.sf, bj.target, bj.trflags, bj.args};
            code.emplace_back(jmp{next});
            code.back().origin = origin;
        };

        // Try to replace jcc to normal exit with bindexit followed by jmp,
        // as long as the sp adjustment is harmless to hoist (disp==0)
        Vptr sp;
        if (match_bindjmp(unit, t1, &sp) && sp == sp.base[0]) {
            fold_exit(ijcc.cc, t1, t0);
        } else if (match_bindjmp(unit, t0, &sp) && sp == sp.base[0]) {
            fold_exit(ccNegate(ijcc.cc), t0, t1);
        }
    });
    printUnit(kVasmExitsLevel, "after vasm-exits", unit);
}
Example #6
0
File: opt.cpp Project: reeze/hhvm
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) {
  Timer timer(Timer::optimize);

  assertx(checkEverything(unit));

  fullDCE(unit);
  printUnit(6, unit, " after initial DCE ");
  assertx(checkEverything(unit));

  if (RuntimeOption::EvalHHIRTypeCheckHoisting) {
    doPass(unit, hoistTypeChecks, DCE::Minimal);
  }

  if (RuntimeOption::EvalHHIRPredictionOpts) {
    doPass(unit, optimizePredictions, DCE::None);
  }

  if (RuntimeOption::EvalHHIRSimplification) {
    doPass(unit, simplifyPass, DCE::Full);
    doPass(unit, cleanCfg, DCE::None);
  }

  if (RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(unit, gvn, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeLoads, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeStores, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) {
    doPass(unit, optimizeRefcounts2, DCE::Full);
  }

  if (RuntimeOption::EvalHHIRLICM && RuntimeOption::EvalJitLoops &&
      cfgHasLoop(unit) && kind != TransKind::Profile) {
    doPass(unit, optimizeLoopInvariantCode, DCE::Minimal);
  }

  doPass(unit, removeExitPlaceholders, DCE::Full);

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    doPass(unit, insertAsserts, DCE::None);
  }

  // Perform final cleanup passes to collapse any critical edges that were
  // split, and simplify our instructions before shipping off to codegen.
  doPass(unit, cleanCfg, DCE::None);
  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRSimplification) {
    doPass(unit, simplifyPass, DCE::Full);
  }
}
Example #7
0
// Remove dead instructions by doing a traditional liveness analysis.
// instructions that mutate memory, physical registers, or status flags
// are considered useful. All branches are considered useful.
//
// Given SSA, there's a faster sparse version of this algorithm that marks
// useful instructions in one pass, then transitively marks pure instructions
// that define inputs to useful instructions. However it requires a mapping
// from vreg numbers to the instruction that defines them, and a way to address
// individual instructions.
//
// We could remove useless branches by computing the post-dominator tree and
// RDF(b) for each block; then a branch is only useful if it controls whether
// or not a useful block executes, and useless branches can be forwarded to
// the nearest useful post-dominator.
void removeDeadCode(Vunit& unit) {
  Timer timer(Timer::vasm_dce);
  auto blocks = sortBlocks(unit);
  jit::vector<LiveSet> livein(unit.blocks.size());
  LiveSet live(unit.next_vr);

  auto pass = [&](bool mutate) {
    bool changed = false;
    for (auto blockIt = blocks.end(); blockIt != blocks.begin();) {
      auto b = *--blockIt;
      auto& block = unit.blocks[b];
      live.reset();
      for (auto s : succs(block)) {
        if (!livein[s].empty()) {
          live |= livein[s];
        }
      }
      for (auto i = block.code.end(); i != block.code.begin();) {
        auto& inst = *--i;
        auto useful = effectful(inst);
        visitDefs(unit, inst, [&](Vreg r) {
          if (r.isPhys() || live.test(r)) {
            useful = true;
            live.reset(r);
          }
        });
        if (useful) {
          visitUses(unit, inst, [&](Vreg r) {
            live.set(r);
          });
        } else if (mutate) {
          inst = nop{};
          changed = true;
        }
      }
      if (mutate) {
        assertx(live == livein[b]);
      } else {
        if (live != livein[b]) {
          livein[b] = live;
          changed = true;
        }
      }
    }
    return changed;
  };

  // analyze until livein reaches a fixed point
  while (pass(false)) {}
  auto const changed = pass(true);
  removeTrivialNops(unit);
  if (changed) {
    printUnit(kVasmDCELevel, "after vasm-dead", unit);
  }
}
Example #8
0
void genCode(CodeBlock& main, CodeBlock& stubs, IRUnit& unit,
             std::vector<TransBCMapping>* bcMap,
             JIT::MCGenerator* mcg,
             const RegAllocInfo& regs) {
  Timer _t(Timer::codeGen);

  if (dumpIREnabled()) {
    AsmInfo ai(unit);
    genCodeImpl(main, stubs, unit, bcMap, mcg, regs, &ai);
    printUnit(kCodeGenLevel, unit, " after code gen ", &regs, &ai);
  } else {
    genCodeImpl(main, stubs, unit, bcMap, mcg, regs, nullptr);
  }
}
Example #9
0
/*
 * Branch fusion:
 * Analyze blocks one at a time, looking for the sequence:
 *
 *   setcc cc, f1 => b
 *   ...
 *   testb b, b => f2
 *   ...
 *   jcc E|NE, f2
 *
 * If found, and f2 is only used by the jcc, then change the code to:
 *
 *   setcc cc, f1 => b
 *   ...
 *   nop
 *   ...
 *   jcc !cc|cc, f1
 *
 * Later, vasm-dead will clean up the nop, and the setcc if b became dead.
 *
 * During the search, any other instruction that has a status flag result
 * will reset the pattern matcher. No instruction can "kill" flags,
 * since flags are SSA variables. However the transformation we want to
 * make extends the setcc flags lifetime, and we don't want it to overlap
 * another flag's lifetime.
 */
void fuseBranches(Vunit& unit) {
  auto blocks = sortBlocks(unit);
  jit::vector<unsigned> uses(unit.next_vr);
  for (auto b : blocks) {
    for (auto& inst : unit.blocks[b].code) {
      visitUses(unit, inst, [&](Vreg r) {
        uses[r]++;
      });
    }
  }
  bool should_print = false;
  for (auto b : blocks) {
    auto& code = unit.blocks[b].code;
    ConditionCode cc;
    Vreg setcc_flags, setcc_dest, testb_flags;
    unsigned testb_index;
    for (unsigned i = 0, n = code.size(); i < n; ++i) {
      if (code[i].op == Vinstr::setcc) {
        cc = code[i].setcc_.cc;
        setcc_flags = code[i].setcc_.sf;
        setcc_dest = code[i].setcc_.d;
        continue;
      }
      if (setcc_flags.isValid() &&
          match_testb(code[i], setcc_dest) &&
          uses[code[i].testb_.sf] == 1) {
        testb_flags = code[i].testb_.sf;
        testb_index = i;
        continue;
      }
      if (match_jcc(code[i], testb_flags)) {
        code[testb_index] = nop{}; // erase the testb
        auto& jcc = code[i].jcc_;
        jcc.cc = jcc.cc == CC_NE ? cc : ccNegate(cc);
        jcc.sf = setcc_flags;
        should_print = true;
        continue;
      }
      if (setcc_flags.isValid() && sets_flags(code[i])) {
        setcc_flags = testb_flags = Vreg{};
      }
    }
  }
  if (should_print) {
    printUnit(kVasmFusionLevel, "after vasm-fusion", unit);
  }
}
Example #10
0
TCA genFuncPrologue(TransID transID, TransKind kind, Func* func, int argc,
                    CodeCache::View code, CGMeta& fixups) {
  auto context = prologue_context(transID, kind, func,
                                  func->getEntryForNumArgs(argc));
  IRUnit unit{context};
  irgen::IRGS env{unit, nullptr};

  irgen::emitFuncPrologue(env, argc, transID);
  irgen::sealUnit(env);

  printUnit(2, unit, "After initial prologue generation");

  auto vunit = irlower::lowerUnit(env.unit, CodeKind::CrossTrace);
  emitVunit(*vunit, env.unit, code, fixups);

  return unit.prologueStart;
}
Example #11
0
void SenMLPack::fieldsToJson()
{
    int bnLength = this->_bn.length();
    if(bnLength > 0){
        printText("\"bn\":\"", 6);
        printText(this->_bn.c_str(), bnLength);
        printText("\"", 1);
    }
    if(this->_bu){
        printText(",\"bu\":\"", 7);
        printUnit(this->_bu);
        printText("\"", 1);
    }
    if(!isnan(this->_bt)){
        printText(",\"bt\":", 6);
        printDouble(this->_bt, SENML_MAX_DOUBLE_PRECISION);
    }
}
Example #12
0
void foldImms(Vunit& unit) {
    assertx(check(unit)); // especially, SSA
    // block order doesn't matter, but only visit reachable blocks.
    auto blocks = sortBlocks(unit);

    // Use flag for each registers.  If a SR is used then
    // certain optimizations will not fire since they do not
    // set the condition codes as the original instruction(s)
    // would.
    jit::vector<bool> used(unit.next_vr);
    for (auto b : blocks) {
        for (auto& inst : unit.blocks[b].code) {
            visitUses(unit, inst, [&](Vreg r) {
                used[r] = true;
            });
        }
    }

    Folder folder(std::move(used));
    folder.vals.resize(unit.next_vr);
    folder.valid.resize(unit.next_vr);
    // figure out which Vregs are constants and stash their values.
    for (auto& entry : unit.constToReg) {
        folder.valid.set(entry.second);
        folder.vals[entry.second] = entry.first.val;
    }
    // now mutate instructions
    for (auto b : blocks) {
        for (auto& inst : unit.blocks[b].code) {
            switch (inst.op) {
#define O(name, imms, uses, defs)\
        case Vinstr::name: {\
          auto origin = inst.origin;\
          folder.fold(inst.name##_, inst);\
          inst.origin = origin;\
          break;\
        }
                VASM_OPCODES
#undef O
            }
        }
    }
    printUnit(kVasmImmsLevel, "after foldImms", unit);
}
Example #13
0
TCA genFuncPrologue(TransID transID, TransKind kind, Func* func, int argc,
                    CodeCache::View code, CGMeta& fixups) {
  auto context = prologue_context(transID, kind, func,
                                  func->getEntryForNumArgs(argc));
  IRUnit unit{context};
  irgen::IRGS env{unit};

  auto& cb = code.main();

  // Dump the func guard in the TC before anything else.
  emitFuncGuard(func, cb, fixups);
  auto const start = cb.frontier();

  irgen::emitFuncPrologue(env, argc, transID);
  irgen::sealUnit(env);

  printUnit(2, unit, "After initial prologue generation");

  auto vunit = irlower::lowerUnit(env.unit, CodeKind::CrossTrace);
  emitVunit(*vunit, env.unit, code, fixups);

  return start;
}
Example #14
0
void optimizeJmps(Vunit& unit) {
    auto isEmpty = [&](Vlabel b, Vinstr::Opcode op) {
        auto& code = unit.blocks[b].code;
        return code.size() == 1 && op == code[0].op;
    };
    bool changed = false;
    bool ever_changed = false;
    // The number of incoming edges from (reachable) predecessors for each block.
    // It is maintained as an upper bound of the actual value during the
    // transformation.
    jit::vector<int> npreds(unit.blocks.size(), 0);
    do {
        if (changed) {
            std::fill(begin(npreds), end(npreds), 0);
        }
        changed = false;
        PostorderWalker{unit} .dfs([&](Vlabel b) {
            for (auto s : succs(unit.blocks[b])) {
                npreds[s]++;
            }
        });
        // give entry an extra predecessor to prevent cloning it.
        npreds[unit.entry]++;

        PostorderWalker{unit} .dfs([&](Vlabel b) {
            auto& block = unit.blocks[b];
            auto& code = block.code;
            assertx(!code.empty());
            if (code.back().op == Vinstr::jcc) {
                auto ss = succs(block);
                if (ss[0] == ss[1]) {
                    // both edges have same target, change to jmp
                    code.back() = jmp{ss[0]};
                    --npreds[ss[0]];
                    changed = true;
                } else {
                    auto jcc_i = code.back().jcc_;
                    if (isEmpty(jcc_i.targets[0], Vinstr::fallback)) {
                        jcc_i = jcc{ccNegate(jcc_i.cc), jcc_i.sf,
                            {jcc_i.targets[1], jcc_i.targets[0]}};
                    }
                    if (isEmpty(jcc_i.targets[1], Vinstr::fallback)) {
                        // replace jcc with fallbackcc and jmp
                        const auto& fb_i = unit.blocks[jcc_i.targets[1]].code[0].fallback_;
                        const auto t0 = jcc_i.targets[0];
                        const auto jcc_origin = code.back().origin;
                        code.pop_back();
                        code.emplace_back(
                            fallbackcc{jcc_i.cc, jcc_i.sf, fb_i.dest, fb_i.trflags});
                        code.back().origin = jcc_origin;
                        code.emplace_back(jmp{t0});
                        code.back().origin = jcc_origin;
                        changed = true;
                    }
                }
            }

            for (auto& s : succs(block)) {
                if (isEmpty(s, Vinstr::jmp)) {
                    // skip over s
                    --npreds[s];
                    s = unit.blocks[s].code.back().jmp_.target;
                    ++npreds[s];
                    changed = true;
                }
            }

            if (code.back().op == Vinstr::jmp) {
                auto s = code.back().jmp_.target;
                if (npreds[s] == 1 || isEmpty(s, Vinstr::jcc)) {
                    // overwrite jmp with copy of s
                    auto& code2 = unit.blocks[s].code;
                    code.pop_back();
                    code.insert(code.end(), code2.begin(), code2.end());
                    if (--npreds[s]) {
                        for (auto ss : succs(block)) {
                            ++npreds[ss];
                        }
                    }
                    changed = true;
                }
            }
        });
        ever_changed |= changed;
    } while (changed);
    if (ever_changed) {
        printUnit(kVasmJumpsLevel, "after vasm-jumps", unit);
    }
}
Example #15
0
File: opt.cpp Project: bjori/hhvm
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) {
  Timer _t(Timer::optimize);

  auto finishPass = [&](const char* msg) {
    if (msg) {
      printUnit(6, unit, folly::format("after {}", msg).str().c_str());
    }
    assert(checkCfg(unit));
    assert(checkTmpsSpanningCalls(unit));
    if (debug) {
      forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) {
        assert(checkOperandTypes(inst, &unit));
      });
    }
  };

  auto doPass = [&](void (*fn)(IRUnit&), const char* msg = nullptr) {
    fn(unit);
    finishPass(msg);
  };

  auto dce = [&](const char* which) {
    if (!RuntimeOption::EvalHHIRDeadCodeElim) return;
    eliminateDeadCode(unit);
    finishPass(folly::format("{} DCE", which).str().c_str());
  };

  auto const doReoptimize = RuntimeOption::EvalHHIRExtraOptPass &&
    (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification);

  auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit);

  // TODO(#5792564): Guard relaxation doesn't work with loops.
  if (shouldHHIRRelaxGuards() && !hasLoop) {
    Timer _t(Timer::optimize_relaxGuards);
    const bool simple = kind == TransKind::Profile &&
                        (RuntimeOption::EvalJitRegionSelector == "tracelet" ||
                         RuntimeOption::EvalJitRegionSelector == "method");
    RelaxGuardsFlags flags = (RelaxGuardsFlags)
      (RelaxReflow | (simple ? RelaxSimple : RelaxNormal));
    auto changed = relaxGuards(unit, *irBuilder.guards(), flags);
    if (changed) finishPass("guard relaxation");

    if (doReoptimize) {
      irBuilder.reoptimize();
      finishPass("guard relaxation reoptimize");
    }
  }

  if (RuntimeOption::EvalHHIRRefcountOpts) {
    optimizeRefcounts(unit, FrameStateMgr{unit.entry()->front().marker()});
    finishPass("refcount opts");
  }

  dce("initial");

  if (RuntimeOption::EvalHHIRPredictionOpts) {
    doPass(optimizePredictions, "prediction opts");
  }

  if (doReoptimize) {
    irBuilder.reoptimize();
    finishPass("reoptimize");
    dce("reoptimize");
  }

  if (RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(gvn);
    dce("gvn");
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(optimizeLoads);
    dce("loadelim");
  }

  /*
   * Note: doing this pass this late might not be ideal, in particular because
   * we've already turned some StLoc instructions into StLocNT.
   *
   * But right now there are assumptions preventing us from doing it before
   * refcount opts.  (Refcount opts needs to see all the StLocs explicitly
   * because it makes assumptions about whether references are consumed based
   * on that.)
   */
  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(optimizeStores);
    dce("storeelim");
  }

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    doPass(insertAsserts, "RefCnt asserts");
  }
}
Example #16
0
void BackEnd::genCodeImpl(IRUnit& unit, AsmInfo* asmInfo) {
  ctr++;
  auto regs = allocateRegs(unit);
  assert(checkRegisters(unit, regs)); // calls checkCfg internally.
  Timer _t(Timer::codeGen);
  LiveRegs live_regs = computeLiveRegs(unit, regs);
  CodegenState state(unit, regs, live_regs, asmInfo);

  CodeBlock& mainCodeIn   = mcg->code.main();
  CodeBlock& coldCodeIn   = mcg->code.cold();
  CodeBlock* frozenCode   = &mcg->code.frozen();

  CodeBlock mainCode;
  CodeBlock coldCode;
  bool relocate = false;
  if (RuntimeOption::EvalJitRelocationSize &&
      supportsRelocation() &&
      coldCodeIn.canEmit(RuntimeOption::EvalJitRelocationSize * 3)) {
    /*
     * This is mainly to exercise the relocator, and ensure that its
     * not broken by new non-relocatable code. Later, it will be
     * used to do some peephole optimizations, such as reducing branch
     * sizes.
     * Allocate enough space that the relocated cold code doesn't
     * overlap the emitted cold code.
     */

    static unsigned seed = 42;
    auto off = rand_r(&seed) & (cacheLineSize() - 1);
    coldCode.init(coldCodeIn.frontier() +
                   RuntimeOption::EvalJitRelocationSize + off,
                   RuntimeOption::EvalJitRelocationSize - off, "cgRelocCold");

    mainCode.init(coldCode.frontier() +
                  RuntimeOption::EvalJitRelocationSize + off,
                  RuntimeOption::EvalJitRelocationSize - off, "cgRelocMain");

    relocate = true;
  } else {
    /*
     * Use separate code blocks, so that attempts to use the mcg's
     * code blocks directly will fail (eg by overwriting the same
     * memory being written through these locals).
     */
    coldCode.init(coldCodeIn.frontier(), coldCodeIn.available(),
                  coldCodeIn.name().c_str());
    mainCode.init(mainCodeIn.frontier(), mainCodeIn.available(),
                  mainCodeIn.name().c_str());
  }

  if (frozenCode == &coldCodeIn) {
    frozenCode = &coldCode;
  }
  auto frozenStart = frozenCode->frontier();
  auto coldStart DEBUG_ONLY = coldCodeIn.frontier();
  auto mainStart DEBUG_ONLY = mainCodeIn.frontier();
  size_t hhir_count{0};
  {
    mcg->code.lock();
    mcg->cgFixups().setBlocks(&mainCode, &coldCode, frozenCode);

    SCOPE_EXIT {
      mcg->cgFixups().setBlocks(nullptr, nullptr, nullptr);
      mcg->code.unlock();
    };

    if (RuntimeOption::EvalHHIRGenerateAsserts) {
      emitTraceCall(mainCode, unit.bcOff());
    }

    auto const linfo = layoutBlocks(unit);
    auto main_start = mainCode.frontier();
    auto cold_start = coldCode.frontier();
    auto frozen_start = frozenCode->frontier();
    Vasm vasm(&state.meta);
    auto& vunit = vasm.unit();
    // create the initial set of vasm numbered the same as hhir blocks.
    for (uint32_t i = 0, n = unit.numBlocks(); i < n; ++i) {
      state.labels[i] = vunit.makeBlock(AreaIndex::Main);
    }
    vunit.roots.push_back(state.labels[unit.entry()]);
    vasm.main(mainCode);
    vasm.cold(coldCode);
    vasm.frozen(*frozenCode);
    for (auto it = linfo.blocks.begin(); it != linfo.blocks.end(); ++it) {
      auto block = *it;
      auto v = block->hint() == Block::Hint::Unlikely ? vasm.cold() :
               block->hint() == Block::Hint::Unused ? vasm.frozen() :
               vasm.main();
      FTRACE(6, "genBlock {} on {}\n", block->id(),
             area_names[(unsigned)v.area()]);
      auto b = state.labels[block];
      vunit.blocks[b].area = v.area();
      v.use(b);
      hhir_count += genBlock(unit, v, vasm, state, block);
      assert(v.closed());
      assert(vasm.main().empty() || vasm.main().closed());
      assert(vasm.cold().empty() || vasm.cold().closed());
      assert(vasm.frozen().empty() || vasm.frozen().closed());
    }
    printUnit("after code-gen", vasm.unit());
    vasm.finish(vasm_abi);
    if (state.asmInfo) {
      auto block = unit.entry();
      state.asmInfo->asmRanges[block] = {main_start, mainCode.frontier()};
      if (mainCode.base() != coldCode.base() && frozenCode != &coldCode) {
        state.asmInfo->acoldRanges[block] = {cold_start, coldCode.frontier()};
      }
      if (mainCode.base() != frozenCode->base()) {
        state.asmInfo->afrozenRanges[block] = {frozen_start,
                                               frozenCode->frontier()};
      }
    }
  }
  auto bcMap = &mcg->cgFixups().m_bcMap;
  if (!bcMap->empty()) {
    TRACE(1, "BCMAPS before relocation\n");
    for (UNUSED auto& map : *bcMap) {
      TRACE(1, "%s %-6d %p %p %p\n", map.md5.toString().c_str(),
             map.bcStart, map.aStart, map.acoldStart, map.afrozenStart);
    }
  }

  assert(coldCodeIn.frontier() == coldStart);
  assert(mainCodeIn.frontier() == mainStart);

  if (relocate) {
    if (asmInfo) {
      printUnit(kRelocationLevel, unit, " before relocation ", &regs, asmInfo);
    }

    auto& be = mcg->backEnd();
    RelocationInfo rel;
    size_t asm_count{0};
    asm_count += be.relocate(rel, mainCodeIn,
                mainCode.base(), mainCode.frontier(),
                mcg->cgFixups());

    asm_count += be.relocate(rel, coldCodeIn,
                coldCode.base(), coldCode.frontier(),
                mcg->cgFixups());
    TRACE(1, "hhir-inst-count %ld asm %ld\n", hhir_count, asm_count);

    if (frozenCode != &coldCode) {
      rel.recordRange(frozenStart, frozenCode->frontier(),
                      frozenStart, frozenCode->frontier());
    }
    be.adjustForRelocation(rel, mcg->cgFixups());
    be.adjustForRelocation(rel, asmInfo, mcg->cgFixups());

    if (asmInfo) {
      static int64_t mainDeltaTot = 0, coldDeltaTot = 0;
      int64_t mainDelta =
        (mainCodeIn.frontier() - mainStart) -
        (mainCode.frontier() - mainCode.base());
      int64_t coldDelta =
        (coldCodeIn.frontier() - coldStart) -
        (coldCode.frontier() - coldCode.base());

      mainDeltaTot += mainDelta;
      HPHP::Trace::traceRelease("main delta after relocation: %" PRId64
                                " (%" PRId64 ")\n",
                                mainDelta, mainDeltaTot);
      coldDeltaTot += coldDelta;
      HPHP::Trace::traceRelease("cold delta after relocation: %" PRId64
                                " (%" PRId64 ")\n",
                                coldDelta, coldDeltaTot);
    }
#ifndef NDEBUG
    auto& ip = mcg->cgFixups().m_inProgressTailJumps;
    for (size_t i = 0; i < ip.size(); ++i) {
      const auto& ib = ip[i];
      assert(!mainCode.contains(ib.toSmash()));
      assert(!coldCode.contains(ib.toSmash()));
    }
    memset(mainCode.base(), 0xcc, mainCode.frontier() - mainCode.base());
    memset(coldCode.base(), 0xcc, coldCode.frontier() - coldCode.base());
#endif
  } else {
    coldCodeIn.skip(coldCode.frontier() - coldCodeIn.frontier());
    mainCodeIn.skip(mainCode.frontier() - mainCodeIn.frontier());
  }

  if (asmInfo) {
    printUnit(kCodeGenLevel, unit, " after code gen ", &regs, asmInfo);
  }
}
Example #17
0
File: opt.cpp Project: BillHu/hhvm
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) {
  Timer _t(Timer::optimize);

  auto const finishPass = [&] (const char* msg) {
    if (msg) {
      printUnit(6, unit, folly::format("after {}", msg).str().c_str());
    }
    assertx(checkCfg(unit));
    assertx(checkTmpsSpanningCalls(unit));
    if (debug) {
      forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) {
        assertx(checkOperandTypes(inst, &unit));
      });
    }
  };

  auto const doPass = [&] (void (*fn)(IRUnit&), const char* msg = nullptr) {
    fn(unit);
    finishPass(msg);
  };

  auto const dce = [&] (const char* which) {
    if (!RuntimeOption::EvalHHIRDeadCodeElim) return;
    eliminateDeadCode(unit);
    finishPass(folly::format("{} DCE", which).str().c_str());
  };

  auto const simplifyPass = [] (IRUnit& unit) {
    boost::dynamic_bitset<> reachable(unit.numBlocks());
    reachable.set(unit.entry()->id());

    auto const blocks = rpoSortCfg(unit);

    for (auto block : blocks) {
      // Skip unreachable blocks, or simplify() cries.
      if (!reachable.test(block->id())) continue;

      for (auto& inst : *block) simplify(unit, &inst);

      if (auto const b = block->back().next())  reachable.set(b->id());
      if (auto const b = block->back().taken()) reachable.set(b->id());
    }
  };

  auto const doSimplify = RuntimeOption::EvalHHIRExtraOptPass &&
                          RuntimeOption::EvalHHIRSimplification;
  auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit);

  auto const traceMode = kind != TransKind::Optimize ||
                         RuntimeOption::EvalJitPGORegionSelector == "hottrace";

  // TODO (#5792564): Guard relaxation doesn't work with loops.
  // TODO (#6599498): Guard relaxation is broken in wholecfg mode.
  if (shouldHHIRRelaxGuards() && !hasLoop && traceMode) {
    Timer _t(Timer::optimize_relaxGuards);
    const bool simple = kind == TransKind::Profile &&
                        (RuntimeOption::EvalJitRegionSelector == "tracelet" ||
                         RuntimeOption::EvalJitRegionSelector == "method");
    RelaxGuardsFlags flags = (RelaxGuardsFlags)
      (RelaxReflow | (simple ? RelaxSimple : RelaxNormal));
    auto changed = relaxGuards(unit, *irBuilder.guards(), flags);
    if (changed) finishPass("guard relaxation");

    if (doSimplify) {
      doPass(simplifyPass, "guard relaxation simplify");
    }
  }

  // This is vestigial (it removes some instructions needed by the old refcount
  // opts pass), and will be removed soon.
  eliminateTakes(unit);

  dce("initial");

  if (RuntimeOption::EvalHHIRPredictionOpts) {
    doPass(optimizePredictions, "prediction opts");
  }

  if (doSimplify) {
    doPass(simplifyPass, "simplify");
    dce("simplify");
  }

  if (RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(gvn);
    dce("gvn");
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(optimizeLoads);
    dce("loadelim");
  }

  /*
   * Note: doing this pass this late might not be ideal, in particular because
   * we've already turned some StLoc instructions into StLocNT.
   *
   * But right now there are assumptions preventing us from doing it before
   * refcount opts.  (Refcount opts needs to see all the StLocs explicitly
   * because it makes assumptions about whether references are consumed based
   * on that.)
   */
  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(optimizeStores);
    dce("storeelim");
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) {
    doPass(optimizeRefcounts2);
    dce("refcount");
  }

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    doPass(insertAsserts);
  }
}
Example #18
0
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) {
  Timer timer(Timer::optimize);

  assertx(checkEverything(unit));

  auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit);
  auto const func = unit.entry()->front().marker().func();
  auto const regionMode = pgoRegionMode(*func);
  auto const traceMode = kind != TransKind::Optimize ||
                         regionMode == PGORegionMode::Hottrace;

  // TODO (#5792564): Guard relaxation doesn't work with loops.
  // TODO (#6599498): Guard relaxation is broken in wholecfg mode.
  if (shouldHHIRRelaxGuards() && !hasLoop && traceMode) {
    Timer _t(Timer::optimize_relaxGuards);
    const bool simple = kind == TransKind::Profile &&
                        (RuntimeOption::EvalJitRegionSelector == "tracelet" ||
                         RuntimeOption::EvalJitRegionSelector == "method");
    RelaxGuardsFlags flags = (RelaxGuardsFlags)
      (RelaxReflow | (simple ? RelaxSimple : RelaxNormal));
    auto changed = relaxGuards(unit, *irBuilder.guards(), flags);
    if (changed) {
      printUnit(6, unit, "after guard relaxation");
      mandatoryDCE(unit);  // relaxGuards can leave unreachable preds.
    }

    if (RuntimeOption::EvalHHIRSimplification) {
      doPass(unit, simplifyPass, DCE::Minimal);
      doPass(unit, cleanCfg, DCE::None);
    }
  }

  fullDCE(unit);
  printUnit(6, unit, " after initial DCE ");
  assertx(checkEverything(unit));

  if (RuntimeOption::EvalHHIRTypeCheckHoisting) {
    doPass(unit, hoistTypeChecks, DCE::None);
  }

  if (RuntimeOption::EvalHHIRPredictionOpts) {
    doPass(unit, optimizePredictions, DCE::None);
  }

  if (RuntimeOption::EvalHHIRSimplification) {
    doPass(unit, simplifyPass, DCE::Full);
    doPass(unit, cleanCfg, DCE::None);
  }

  if (RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(unit, gvn, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeLoads, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeStores, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) {
    doPass(unit, optimizeRefcounts2, DCE::Full);
  }

  if (RuntimeOption::EvalHHIRLICM) {
    if (kind != TransKind::Profile && hasLoop) {
      // The clean pass is just to stress lack of pre_headers for now, since
      // LICM is a disabled prototype pass.
      doPass(unit, cleanCfg, DCE::None);
      doPass(unit, optimizeLoopInvariantCode, DCE::Minimal);
    }
  }
  doPass(unit, removeExitPlaceholders, DCE::Full);

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    doPass(unit, insertAsserts, DCE::None);
  }
}
Example #19
0
void optimize(IRUnit& unit, TransKind kind) {
  Timer timer(Timer::optimize, unit.logEntry().get_pointer());

  assertx(checkEverything(unit));

  fullDCE(unit);
  printUnit(6, unit, " after initial DCE ");
  assertx(checkEverything(unit));

  if (RuntimeOption::EvalHHIRPredictionOpts) {
    doPass(unit, optimizePredictions, DCE::None);
  }

  if (RuntimeOption::EvalHHIRSimplification) {
    doPass(unit, simplifyPass, DCE::Full);
    doPass(unit, cleanCfg, DCE::None);
  }

  if (RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(unit, gvn, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeLoads, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) {
    doPass(unit, optimizeStores, DCE::Full);
  }

  if (RuntimeOption::EvalHHIRPartialInlineFrameOpts) {
    doPass(unit, optimizeInlineReturns, DCE::Full);
  }

  doPass(unit, optimizePhis, DCE::Full);

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) {
    doPass(unit, optimizeRefcounts, DCE::Full);
  }

  if (RuntimeOption::EvalHHIRLICM && cfgHasLoop(unit) &&
      kind != TransKind::Profile) {
    doPass(unit, optimizeLoopInvariantCode, DCE::Minimal);
  }

  doPass(unit, simplifyOrdStrIdx, DCE::Minimal);

  doPass(unit, removeExitPlaceholders, DCE::Full);

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    doPass(unit, insertAsserts, DCE::None);
  }

  // Perform final cleanup passes to collapse any critical edges that were
  // split, and simplify our instructions before shipping off to codegen.
  doPass(unit, cleanCfg, DCE::None);

  if (kind != TransKind::Profile &&
      RuntimeOption::EvalHHIRGlobalValueNumbering) {
    doPass(unit, gvn, DCE::Full);
  }

  if (kind != TransKind::Profile && RuntimeOption::EvalHHIRSimplification) {
    doPass(unit, simplifyPass, DCE::Full);
  }

  doPass(unit, fixBlockHints, DCE::None);
}
Example #20
0
void optimizeJmps(Vunit& unit) {
  auto isEmpty = [&](Vlabel b, Vinstr::Opcode op) {
    auto& code = unit.blocks[b].code;
    return code.size() == 1 && op == code[0].op;
  };
  bool changed = false;
  bool ever_changed = false;
  jit::vector<int> npreds(unit.blocks.size(), 0);
  do {
    if (changed) {
      fill(npreds.begin(), npreds.end(), 0);
    }
    changed = false;
    PostorderWalker{unit}.dfs([&](Vlabel b) {
      for (auto s : succs(unit.blocks[b])) {
        npreds[s]++;
      }
    });
    // give roots an extra predecessor to prevent cloning them.
    for (auto b : unit.roots) {
      npreds[b]++;
    }
    PostorderWalker{unit}.dfs([&](Vlabel b) {
      auto& block = unit.blocks[b];
      auto& code = block.code;
      assert(!code.empty());
      if (code.back().op == Vinstr::jcc) {
        auto ss = succs(block);
        if (ss[0] == ss[1]) {
          // both edges have same target, change to jmp
          code.back() = jmp{ss[0]};
          changed = true;
        }
      }
      if (code.back().op == Vinstr::jmp) {
        auto& s = code.back().jmp_.target;
        if (isEmpty(s, Vinstr::jmp)) {
          // skip over s
          s = unit.blocks[s].code.back().jmp_.target;
          changed = true;
        } else if (npreds[s] == 1 || isEmpty(s, Vinstr::jcc)) {
          // overwrite jmp with copy of s
          auto& code2 = unit.blocks[s].code;
          code.pop_back();
          code.insert(code.end(), code2.begin(), code2.end());
          changed = true;
        }
      } else {
        for (auto& s : succs(block)) {
          if (isEmpty(s, Vinstr::jmp)) {
            // skip over s
            s = unit.blocks[s].code.back().jmp_.target;
            changed = true;
          }
        }
      }
    });
    ever_changed |= changed;
  } while (changed);
  if (ever_changed) {
    printUnit(kVasmJumpsLevel, "after vasm-jumps", unit);
  }
}
Example #21
0
static void genCodeImpl(IRUnit& unit, AsmInfo* asmInfo) {
  auto regs = allocateRegs(unit);
  assert(checkRegisters(unit, regs)); // calls checkCfg internally.
  Timer _t(Timer::codeGen);
  LiveRegs live_regs = computeLiveRegs(unit, regs);
  CodegenState state(unit, regs, live_regs, asmInfo);

  // Returns: whether a block has already been emitted.
  DEBUG_ONLY auto isEmitted = [&](Block* block) {
    return state.addresses[block];
  };

  CodeBlock& mainCodeIn   = mcg->code.main();
  CodeBlock& coldCodeIn   = mcg->code.cold();
  CodeBlock* frozenCode   = &mcg->code.frozen();

  CodeBlock mainCode;
  CodeBlock coldCode;
  bool relocate = false;
  if (RuntimeOption::EvalJitRelocationSize &&
      mcg->backEnd().supportsRelocation() &&
      coldCodeIn.canEmit(RuntimeOption::EvalJitRelocationSize * 3)) {
    /*
     * This is mainly to exercise the relocator, and ensure that its
     * not broken by new non-relocatable code. Later, it will be
     * used to do some peephole optimizations, such as reducing branch
     * sizes.
     * Allocate enough space that the relocated cold code doesn't
     * overlap the emitted cold code.
     */

    static unsigned seed = 42;
    auto off = rand_r(&seed) & (mcg->backEnd().cacheLineSize() - 1);
    coldCode.init(coldCodeIn.frontier() +
                   RuntimeOption::EvalJitRelocationSize + off,
                   RuntimeOption::EvalJitRelocationSize - off, "cgRelocCold");

    mainCode.init(coldCode.frontier() +
                  RuntimeOption::EvalJitRelocationSize + off,
                  RuntimeOption::EvalJitRelocationSize - off, "cgRelocMain");

    relocate = true;
  } else {
    /*
     * Use separate code blocks, so that attempts to use the mcg's
     * code blocks directly will fail (eg by overwriting the same
     * memory being written through these locals).
     */
    coldCode.init(coldCodeIn.frontier(), coldCodeIn.available(),
                  coldCodeIn.name().c_str());
    mainCode.init(mainCodeIn.frontier(), mainCodeIn.available(),
                  mainCodeIn.name().c_str());
  }

  if (frozenCode == &coldCodeIn) {
    frozenCode = &coldCode;
  }
  auto frozenStart = frozenCode->frontier();
  auto coldStart DEBUG_ONLY = coldCodeIn.frontier();
  auto mainStart DEBUG_ONLY = mainCodeIn.frontier();
  auto bcMap = &mcg->cgFixups().m_bcMap;

  {
    mcg->code.lock();
    mcg->cgFixups().setBlocks(&mainCode, &coldCode, frozenCode);

    SCOPE_EXIT {
      mcg->cgFixups().setBlocks(nullptr, nullptr, nullptr);
      mcg->code.unlock();
    };

    /*
     * Emit the given block on the supplied assembler.  The `nextLinear'
     * is the next block that will be emitted on this assembler.  If is
     * not the next block in control flow order, then emit a patchable jump
     * to the next flow block.
     */
    auto emitBlock = [&](CodeBlock& cb, Block* block, Block* nextLinear) {
      assert(!isEmitted(block));

      FTRACE(6, "genBlock {} on {}\n", block->id(),
             cb.base() == coldCode.base() ? "acold" : "a");

      auto const aStart       = cb.frontier();
      auto const acoldStart   = coldCode.frontier();
      auto const afrozenStart = frozenCode->frontier();
      mcg->backEnd().patchJumps(cb, state, block);
      state.addresses[block] = aStart;

      // If the block ends with a Jmp and the next block is going to be
      // its target, we don't need to actually emit it.
      IRInstruction* last = &block->back();
      state.noTerminalJmp = last->op() == Jmp && nextLinear == last->taken();

      if (state.asmInfo) {
        state.asmInfo->asmRanges[block] = TcaRange(aStart, cb.frontier());
      }

      genBlock(unit, cb, coldCode, *frozenCode, state, block, bcMap);
      auto nextFlow = block->next();
      if (nextFlow && nextFlow != nextLinear) {
        mcg->backEnd().emitFwdJmp(cb, nextFlow, state);
      }

      if (state.asmInfo) {
        state.asmInfo->asmRanges[block] = TcaRange(aStart, cb.frontier());
        if (cb.base() != coldCode.base() && frozenCode != &coldCode) {
          state.asmInfo->acoldRanges[block] = TcaRange(acoldStart,
                                                       coldCode.frontier());
        }
        if (cb.base() != frozenCode->base()) {
          state.asmInfo->afrozenRanges[block] = TcaRange(afrozenStart,
                                                        frozenCode->frontier());
        }
      }
    };

    if (RuntimeOption::EvalHHIRGenerateAsserts) {
      mcg->backEnd().emitTraceCall(mainCode, unit.bcOff());
    }

    auto const linfo = layoutBlocks(unit);

    for (auto it = linfo.blocks.begin(); it != linfo.acoldIt; ++it) {
      Block* nextLinear = boost::next(it) != linfo.acoldIt
        ? *boost::next(it) : nullptr;
      emitBlock(mainCode, *it, nextLinear);
    }
    for (auto it = linfo.acoldIt; it != linfo.afrozenIt; ++it) {
      Block* nextLinear = boost::next(it) != linfo.afrozenIt
        ? *boost::next(it) : nullptr;
      emitBlock(coldCode, *it, nextLinear);
    }
    for (auto it = linfo.afrozenIt; it != linfo.blocks.end(); ++it) {
      Block* nextLinear = boost::next(it) != linfo.blocks.end()
        ? *boost::next(it) : nullptr;
      emitBlock(*frozenCode, *it, nextLinear);
    }

    if (debug) {
      for (Block* UNUSED block : linfo.blocks) {
        assert(isEmitted(block));
      }
    }
  }

  assert(coldCodeIn.frontier() == coldStart);
  assert(mainCodeIn.frontier() == mainStart);

  if (relocate) {
    if (asmInfo) {
      printUnit(kRelocationLevel, unit, " before relocation ", &regs, asmInfo);
    }

    auto& be = mcg->backEnd();
    RelocationInfo rel;
    be.relocate(rel, mainCodeIn,
                mainCode.base(), mainCode.frontier(),
                mcg->cgFixups());

    be.relocate(rel, coldCodeIn,
                coldCode.base(), coldCode.frontier(),
                mcg->cgFixups());

    if (frozenCode != &coldCode) {
      rel.recordRange(frozenStart, frozenCode->frontier(),
                      frozenStart, frozenCode->frontier());
    }
    be.adjustForRelocation(rel, mcg->cgFixups());
    be.adjustForRelocation(rel, asmInfo, mcg->cgFixups());

    if (asmInfo) {
      static int64_t mainDeltaTot = 0, coldDeltaTot = 0;
      int64_t mainDelta =
        (mainCodeIn.frontier() - mainStart) -
        (mainCode.frontier() - mainCode.base());
      int64_t coldDelta =
        (coldCodeIn.frontier() - coldStart) -
        (coldCode.frontier() - coldCode.base());

      mainDeltaTot += mainDelta;
      HPHP::Trace::traceRelease("main delta after relocation: %" PRId64
                                " (%" PRId64 ")\n",
                                mainDelta, mainDeltaTot);
      coldDeltaTot += coldDelta;
      HPHP::Trace::traceRelease("cold delta after relocation: %" PRId64
                                " (%" PRId64 ")\n",
                                coldDelta, coldDeltaTot);
    }
#ifndef NDEBUG
    auto& ip = mcg->cgFixups().m_inProgressTailJumps;
    for (size_t i = 0; i < ip.size(); ++i) {
      const auto& ib = ip[i];
      assert(!mainCode.contains(ib.toSmash()));
      assert(!coldCode.contains(ib.toSmash()));
    }
    memset(mainCode.base(), 0xcc, mainCode.frontier() - mainCode.base());
    memset(coldCode.base(), 0xcc, coldCode.frontier() - coldCode.base());
#endif
  } else {
    coldCodeIn.skip(coldCode.frontier() - coldCodeIn.frontier());
    mainCodeIn.skip(mainCode.frontier() - mainCodeIn.frontier());
  }
  if (asmInfo) {
    printUnit(kCodeGenLevel, unit, " after code gen ", &regs, asmInfo);
  }
}