Пример #1
0
void populate_block(ParseUnitState& puState,
                    const FuncEmitter& fe,
                    php::Func& func,
                    php::Block& blk,
                    PC pc,
                    PC const past,
                    FindBlock findBlock) {
  auto const& ue = fe.ue();

  auto decode_minstr = [&] {
    auto const immVec = ImmVector::createFromStream(pc);
    pc += immVec.size() + sizeof(int32_t) + sizeof(int32_t);

    auto ret = MVector {};
    auto vec = immVec.vec();

    ret.lcode = static_cast<LocationCode>(*vec++);
    if (numLocationCodeImms(ret.lcode)) {
      assert(numLocationCodeImms(ret.lcode) == 1);
      ret.locBase = borrow(func.locals[decodeVariableSizeImm(&vec)]);
    }

    while (vec < pc) {
      auto elm = MElem {};
      elm.mcode = static_cast<MemberCode>(*vec++);
      switch (memberCodeImmType(elm.mcode)) {
      case MCodeImm::None: break;
      case MCodeImm::Local:
        elm.immLoc = borrow(func.locals[decodeMemberCodeImm(&vec, elm.mcode)]);
        break;
      case MCodeImm::String:
        elm.immStr = ue.lookupLitstr(decodeMemberCodeImm(&vec, elm.mcode));
        break;
      case MCodeImm::Int:
        elm.immInt = decodeMemberCodeImm(&vec, elm.mcode);
        break;
      }
      ret.mcodes.push_back(elm);
    }
    assert(vec == pc);

    return ret;
  };

  auto decode_stringvec = [&] {
    auto const vecLen = decode<int32_t>(pc);
    std::vector<SString> keys;
    for (auto i = size_t{0}; i < vecLen; ++i) {
      keys.push_back(ue.lookupLitstr(decode<int32_t>(pc)));
    }
    return keys;
  };

  auto decode_switch = [&] (PC opPC) {
    SwitchTab ret;
    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen; ++i) {
      ret.push_back(findBlock(
        opPC + decode<Offset>(pc) - ue.bc()
      ));
    }
    return ret;
  };

  auto decode_sswitch = [&] (PC opPC) {
    SSwitchTab ret;

    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen - 1; ++i) {
      auto const id = decode<Id>(pc);
      auto const offset = decode<Offset>(pc);
      ret.emplace_back(
        ue.lookupLitstr(id),
        findBlock(opPC + offset - ue.bc())
      );
    }

    // Final case is the default, and must have a litstr id of -1.
    DEBUG_ONLY auto const defId = decode<Id>(pc);
    auto const defOff = decode<Offset>(pc);
    assert(defId == -1);
    ret.emplace_back(nullptr, findBlock(opPC + defOff - ue.bc()));
    return ret;
  };

  auto decode_itertab = [&] {
    IterTab ret;
    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen; ++i) {
      auto const kind = static_cast<IterKind>(decode<int32_t>(pc));
      auto const id = decode<int32_t>(pc);
      ret.emplace_back(kind, borrow(func.iters[id]));
    }
    return ret;
  };

  auto defcls = [&] (const Bytecode& b) {
    puState.defClsMap[b.DefCls.arg1] = &func;
  };
  auto nopdefcls = [&] (const Bytecode& b) {
    puState.defClsMap[b.NopDefCls.arg1] = &func;
  };
  auto createcl = [&] (const Bytecode& b) {
    puState.createClMap[b.CreateCl.str2].insert(&func);
  };

#define IMM_MA(n)      auto mvec = decode_minstr();
#define IMM_BLA(n)     auto targets = decode_switch(opPC);
#define IMM_SLA(n)     auto targets = decode_sswitch(opPC);
#define IMM_ILA(n)     auto iterTab = decode_itertab();
#define IMM_IVA(n)     auto arg##n = decodeVariableSizeImm(&pc);
#define IMM_I64A(n)    auto arg##n = decode<int64_t>(pc);
#define IMM_LA(n)      auto loc##n = [&] {                       \
                         auto id = decodeVariableSizeImm(&pc);   \
                         always_assert(id < func.locals.size()); \
                         return borrow(func.locals[id]);         \
                       }();
#define IMM_IA(n)      auto iter##n = [&] {                      \
                         auto id = decodeVariableSizeImm(&pc);   \
                         always_assert(id < func.iters.size());  \
                         return borrow(func.iters[id]);          \
                       }();
#define IMM_DA(n)      auto dbl##n = decode<double>(pc);
#define IMM_SA(n)      auto str##n = ue.lookupLitstr(decode<Id>(pc));
#define IMM_AA(n)      auto arr##n = ue.lookupArray(decode<Id>(pc));
#define IMM_BA(n)      assert(next == past); \
                       auto target = findBlock(  \
                         opPC + decode<Offset>(pc) - ue.bc());
#define IMM_OA_IMPL(n) decode<uint8_t>(pc);
#define IMM_OA(type)   auto subop = (type)IMM_OA_IMPL
#define IMM_VSA(n)     auto keys = decode_stringvec();

#define IMM_NA
#define IMM_ONE(x)           IMM_##x(1)
#define IMM_TWO(x, y)        IMM_##x(1)          IMM_##y(2)
#define IMM_THREE(x, y, z)   IMM_TWO(x, y)       IMM_##z(3)
#define IMM_FOUR(x, y, z, n) IMM_THREE(x, y, z)  IMM_##n(4)

#define IMM_ARG(which, n)         IMM_NAME_##which(n)
#define IMM_ARG_NA
#define IMM_ARG_ONE(x)            IMM_ARG(x, 1)
#define IMM_ARG_TWO(x, y)         IMM_ARG(x, 1), IMM_ARG(y, 2)
#define IMM_ARG_THREE(x, y, z)    IMM_ARG(x, 1), IMM_ARG(y, 2), \
                                    IMM_ARG(z, 3)
#define IMM_ARG_FOUR(x, y, z, l)  IMM_ARG(x, 1), IMM_ARG(y, 2), \
                                   IMM_ARG(z, 3), IMM_ARG(l, 4)


#define O(opcode, imms, inputs, outputs, flags)       \
  case Op::opcode:                                    \
    {                                                 \
      ++pc;                                           \
      auto b = Bytecode {};                           \
      b.op = Op::opcode;                              \
      b.srcLoc = srcLoc;                              \
      IMM_##imms                                      \
      new (&b.opcode) bc::opcode { IMM_ARG_##imms };  \
      if (Op::opcode == Op::DefCls)    defcls(b);     \
      if (Op::opcode == Op::NopDefCls) nopdefcls(b);  \
      if (Op::opcode == Op::CreateCl)  createcl(b);   \
      blk.hhbcs.push_back(std::move(b));              \
      assert(pc == next);                             \
    }                                                 \
    break;

  assert(pc != past);
  do {
    auto const opPC = pc;
    auto const pop  = reinterpret_cast<const Op*>(pc);
    auto const next = pc + instrLen(pop);
    assert(next <= past);

    auto const srcLoc = [&] {
      SourceLoc sloc;
      if (getSourceLoc(puState.srcLocTable, opPC - ue.bc(), sloc)) {
        return php::SrcLoc {
          { static_cast<uint32_t>(sloc.line0),
            static_cast<uint32_t>(sloc.char0) },
          { static_cast<uint32_t>(sloc.line1),
            static_cast<uint32_t>(sloc.char1) }
        };
      }
      return php::SrcLoc{};
    }();

    switch (*pop) { OPCODES }

    if (next == past) {
      if (instrAllowsFallThru(*pop)) {
        blk.fallthrough = findBlock(next - ue.bc());
      }
    }

    pc = next;
  } while (pc != past);

#undef O

#undef IMM_MA
#undef IMM_BLA
#undef IMM_SLA
#undef IMM_ILA
#undef IMM_IVA
#undef IMM_I64A
#undef IMM_LA
#undef IMM_IA
#undef IMM_DA
#undef IMM_SA
#undef IMM_AA
#undef IMM_BA
#undef IMM_OA_IMPL
#undef IMM_OA
#undef IMM_VSA

#undef IMM_NA
#undef IMM_ONE
#undef IMM_TWO
#undef IMM_THREE
#undef IMM_FOUR

#undef IMM_ARG
#undef IMM_ARG_NA
#undef IMM_ARG_ONE
#undef IMM_ARG_TWO
#undef IMM_ARG_THREE
#undef IMM_ARG_FOUR

  /*
   * If a block ends with an unconditional jump, change it to a
   * fallthrough edge.
   *
   * Just convert the opcode to a Nop, because this could create an
   * empty block and we have an invariant that no blocks are empty.
   */

  auto make_fallthrough = [&] {
    blk.fallthrough = blk.hhbcs.back().Jmp.target;
    blk.hhbcs.back() = bc_with_loc(blk.hhbcs.back().srcLoc, bc::Nop{});
  };

  switch (blk.hhbcs.back().op) {
  case Op::Jmp:   make_fallthrough();                           break;
  case Op::JmpNS: make_fallthrough(); blk.fallthroughNS = true; break;
  default:                                                      break;
  }
}
Пример #2
0
void populate_block(ParseUnitState& puState,
                    const FuncEmitter& fe,
                    php::Func& func,
                    php::Block& blk,
                    PC pc,
                    PC const past,
                    FindBlock findBlock) {
  auto const& ue = fe.ue();

  auto decode_stringvec = [&] {
    auto const vecLen = decode<int32_t>(pc);
    CompactVector<LSString> keys;
    for (auto i = size_t{0}; i < vecLen; ++i) {
      keys.push_back(ue.lookupLitstr(decode<int32_t>(pc)));
    }
    return keys;
  };

  auto decode_switch = [&] (PC opPC) {
    SwitchTab ret;
    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen; ++i) {
      ret.push_back(findBlock(
        opPC + decode<Offset>(pc) - ue.bc()
      )->id);
    }
    return ret;
  };

  auto decode_sswitch = [&] (PC opPC) {
    SSwitchTab ret;

    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen - 1; ++i) {
      auto const id = decode<Id>(pc);
      auto const offset = decode<Offset>(pc);
      ret.emplace_back(ue.lookupLitstr(id),
                       findBlock(opPC + offset - ue.bc())->id);
    }

    // Final case is the default, and must have a litstr id of -1.
    DEBUG_ONLY auto const defId = decode<Id>(pc);
    auto const defOff = decode<Offset>(pc);
    assert(defId == -1);
    ret.emplace_back(nullptr, findBlock(opPC + defOff - ue.bc())->id);
    return ret;
  };

  auto decode_itertab = [&] {
    IterTab ret;
    auto const vecLen = decode<int32_t>(pc);
    for (int32_t i = 0; i < vecLen; ++i) {
      auto const kind = static_cast<IterKind>(decode<int32_t>(pc));
      auto const id = decode<int32_t>(pc);
      ret.emplace_back(kind, id);
    }
    return ret;
  };

  auto defcls = [&] (const Bytecode& b) {
    puState.defClsMap[b.DefCls.arg1] = &func;
  };
  auto defclsnop = [&] (const Bytecode& b) {
    puState.defClsMap[b.DefClsNop.arg1] = &func;
  };
  auto createcl = [&] (const Bytecode& b) {
    puState.createClMap[b.CreateCl.arg2].insert(&func);
  };
  auto has_call_unpack = [&] {
    auto const fpi = Func::findFPI(&*fe.fpitab.begin(),
                                   &*fe.fpitab.end(), pc - ue.bc());
    auto const op = peek_op(ue.bc() + fpi->m_fpiEndOff);
    return op == OpFCallArray || op == OpFCallUnpack;
  };

#define IMM_BLA(n)     auto targets = decode_switch(opPC);
#define IMM_SLA(n)     auto targets = decode_sswitch(opPC);
#define IMM_ILA(n)     auto iterTab = decode_itertab();
#define IMM_IVA(n)     auto arg##n = decode_iva(pc);
#define IMM_I64A(n)    auto arg##n = decode<int64_t>(pc);
#define IMM_LA(n)      auto loc##n = [&] {                       \
                         LocalId id = decode_iva(pc);            \
                         always_assert(id < func.locals.size()); \
                         return id;                              \
                       }();
#define IMM_IA(n)      auto iter##n = [&] {                      \
                         IterId id = decode_iva(pc);             \
                         always_assert(id < func.numIters);      \
                         return id;                              \
                       }();
#define IMM_DA(n)      auto dbl##n = decode<double>(pc);
#define IMM_SA(n)      auto str##n = ue.lookupLitstr(decode<Id>(pc));
#define IMM_RATA(n)    auto rat = decodeRAT(ue, pc);
#define IMM_AA(n)      auto arr##n = ue.lookupArray(decode<Id>(pc));
#define IMM_BA(n)      assert(next == past);     \
                       auto target = findBlock(  \
                         opPC + decode<Offset>(pc) - ue.bc())->id;
#define IMM_OA_IMPL(n) subop##n; decode(pc, subop##n);
#define IMM_OA(type)   type IMM_OA_IMPL
#define IMM_VSA(n)     auto keys = decode_stringvec();
#define IMM_KA(n)      auto mkey = make_mkey(func, decode_member_key(pc, &ue));
#define IMM_LAR(n)     auto locrange = [&] {                                 \
                         auto const range = decodeLocalRange(pc);            \
                         always_assert(range.first + range.restCount         \
                                       < func.locals.size());                \
                         return LocalRange { range.first, range.restCount }; \
                       }();

#define IMM_NA
#define IMM_ONE(x)           IMM_##x(1)
#define IMM_TWO(x, y)        IMM_##x(1)          IMM_##y(2)
#define IMM_THREE(x, y, z)   IMM_TWO(x, y)       IMM_##z(3)
#define IMM_FOUR(x, y, z, n) IMM_THREE(x, y, z)  IMM_##n(4)

#define IMM_ARG(which, n)         IMM_NAME_##which(n)
#define IMM_ARG_NA
#define IMM_ARG_ONE(x)            IMM_ARG(x, 1)
#define IMM_ARG_TWO(x, y)         IMM_ARG(x, 1), IMM_ARG(y, 2)
#define IMM_ARG_THREE(x, y, z)    IMM_ARG(x, 1), IMM_ARG(y, 2), \
                                    IMM_ARG(z, 3)
#define IMM_ARG_FOUR(x, y, z, l)  IMM_ARG(x, 1), IMM_ARG(y, 2), \
                                   IMM_ARG(z, 3), IMM_ARG(l, 4)

#define FLAGS_NF
#define FLAGS_TF
#define FLAGS_CF
#define FLAGS_FF
#define FLAGS_PF auto hu = has_call_unpack();
#define FLAGS_CF_TF
#define FLAGS_CF_FF

#define FLAGS_ARG_NF
#define FLAGS_ARG_TF
#define FLAGS_ARG_CF
#define FLAGS_ARG_FF
#define FLAGS_ARG_PF ,hu
#define FLAGS_ARG_CF_TF
#define FLAGS_ARG_CF_FF

#define O(opcode, imms, inputs, outputs, flags)         \
  case Op::opcode:                                      \
    {                                                   \
      auto b = Bytecode {};                             \
      b.op = Op::opcode;                                \
      b.srcLoc = srcLocIx;                              \
      IMM_##imms                                        \
      FLAGS_##flags                                     \
      new (&b.opcode) bc::opcode { IMM_ARG_##imms       \
                                   FLAGS_ARG_##flags }; \
      if (Op::opcode == Op::DefCls)    defcls(b);       \
      if (Op::opcode == Op::DefClsNop) defclsnop(b);    \
      if (Op::opcode == Op::CreateCl)  createcl(b);     \
      blk.hhbcs.push_back(std::move(b));                \
      assert(pc == next);                               \
    }                                                   \
    break;

  assert(pc != past);
  do {
    auto const opPC = pc;
    auto const next = pc + instrLen(opPC);
    assert(next <= past);

    auto const srcLoc = match<php::SrcLoc>(
      puState.srcLocInfo,
      [&] (const SourceLocTable& tab) {
        SourceLoc sloc;
        if (getSourceLoc(tab, opPC - ue.bc(), sloc)) {
          return php::SrcLoc {
            { static_cast<uint32_t>(sloc.line0),
              static_cast<uint32_t>(sloc.char0) },
            { static_cast<uint32_t>(sloc.line1),
              static_cast<uint32_t>(sloc.char1) }
          };
        }
        return php::SrcLoc{};
      },
      [&] (const LineTable& tab) {
        auto const line = getLineNumber(tab, opPC - ue.bc());
        if (line != -1) {
          return php::SrcLoc {
            { static_cast<uint32_t>(line), 0 },
            { static_cast<uint32_t>(line), 0 },
          };
        };
        return php::SrcLoc{};
      }
    );

    auto const srcLocIx = puState.srcLocs.emplace(
      srcLoc, puState.srcLocs.size()).first->second;

    auto const op = decode_op(pc);
    switch (op) { OPCODES }

    if (next == past) {
      if (instrAllowsFallThru(op)) {
        blk.fallthrough = findBlock(next - ue.bc())->id;
      }
    }

    pc = next;
  } while (pc != past);

#undef O

#undef FLAGS_NF
#undef FLAGS_TF
#undef FLAGS_CF
#undef FLAGS_FF
#undef FLAGS_PF
#undef FLAGS_CF_TF
#undef FLAGS_CF_FF

#undef FLAGS_ARG_NF
#undef FLAGS_ARG_TF
#undef FLAGS_ARG_CF
#undef FLAGS_ARG_FF
#undef FLAGS_ARG_PF
#undef FLAGS_ARG_CF_TF
#undef FLAGS_ARG_CF_FF

#undef IMM_BLA
#undef IMM_SLA
#undef IMM_ILA
#undef IMM_IVA
#undef IMM_I64A
#undef IMM_LA
#undef IMM_IA
#undef IMM_DA
#undef IMM_SA
#undef IMM_RATA
#undef IMM_AA
#undef IMM_BA
#undef IMM_OA_IMPL
#undef IMM_OA
#undef IMM_VSA
#undef IMM_LAR

#undef IMM_NA
#undef IMM_ONE
#undef IMM_TWO
#undef IMM_THREE
#undef IMM_FOUR

#undef IMM_ARG
#undef IMM_ARG_NA
#undef IMM_ARG_ONE
#undef IMM_ARG_TWO
#undef IMM_ARG_THREE
#undef IMM_ARG_FOUR

  /*
   * If a block ends with an unconditional jump, change it to a
   * fallthrough edge.
   *
   * Just convert the opcode to a Nop, because this could create an
   * empty block and we have an invariant that no blocks are empty.
   */

  auto make_fallthrough = [&] {
    blk.fallthrough = blk.hhbcs.back().Jmp.target;
    blk.hhbcs.back() = bc_with_loc(blk.hhbcs.back().srcLoc, bc::Nop{});
  };

  switch (blk.hhbcs.back().op) {
  case Op::Jmp:   make_fallthrough();                           break;
  case Op::JmpNS: make_fallthrough(); blk.fallthroughNS = true; break;
  default:                                                      break;
  }
}