void
ParserOpSet<ParserLookAhead>::operator()(Packet *pkt, const char *data,
                                         size_t *bytes_parsed) const {
  (void) bytes_parsed;
  static thread_local ByteContainer bc;
  PHV *phv = pkt->get_phv();
  Field &f_dst = phv->get_field(dst.header, dst.offset);
  int f_bits = f_dst.get_nbits();
  /* I expect the first case to be the most common one. In the first case, we
     extract the packet bytes to the field bytes and sync the bignum value. The
     second case requires extracting the packet bytes to the ByteContainer, then
     importing the bytes into the field's bignum value, then finally exporting
     the bignum value to the field's byte array. I could alternatively write a
     more general extract function which would account for a potential size
     difference between source and destination. */
  // TODO(antonin)
  if (src.bitwidth == f_bits) {
    data += src.byte_offset;
    f_dst.extract(data, src.bit_offset);
  } else {
    bc.clear();
    src.peek(data, &bc);
    f_dst.set(bc);
  }
  BMLOG_DEBUG_PKT(
    *pkt,
    "Parser set: setting field ({}, {}) from lookahead ({}, {}), "
    "new value is {}",
    dst.header, dst.offset, src.bit_offset, src.bitwidth, f_dst);
}
Exemple #2
0
void
Deparser::deparse(Packet *pkt) const {
  PHV *phv = pkt->get_phv();
  BMELOG(deparser_start, *pkt, *this);
  // TODO(antonin)
  // this is temporary while we experiment with the debugger
  DEBUGGER_NOTIFY_CTR(
      Debugger::PacketId::make(pkt->get_packet_id(), pkt->get_copy_id()),
      DBG_CTR_DEPARSER | get_id());
  update_checksums(pkt);
  char *data = pkt->prepend(get_headers_size(*phv));
  int bytes_parsed = 0;
  // invalidating headers, and resetting header stacks is done in the Packet
  // destructor, when the PHV is released
  for (auto it = headers.begin(); it != headers.end(); ++it) {
    Header &header = phv->get_header(*it);
    if (header.is_valid()) {
      BMELOG(deparser_emit, *pkt, *it);
      header.deparse(data + bytes_parsed);
      bytes_parsed += header.get_nbytes_packet();
      // header.mark_invalid();
    }
  }
  // phv->reset_header_stacks();
  BMELOG(deparser_done, *pkt, *this);
  DEBUGGER_NOTIFY_CTR(
      Debugger::PacketId::make(pkt->get_packet_id(), pkt->get_copy_id()),
      DBG_CTR_EXIT(DBG_CTR_DEPARSER) | get_id());
}
void
SimpleSwitch::enqueue(int egress_port, std::unique_ptr<Packet> &&packet) {
    packet->set_egress_port(egress_port);

    PHV *phv = packet->get_phv();

    if (with_queueing_metadata) {
      phv->get_field("queueing_metadata.enq_timestamp").set(get_ts().count());
      phv->get_field("queueing_metadata.enq_qdepth")
          .set(egress_buffers.size(egress_port));
    }

#ifdef SSWITCH_PRIORITY_QUEUEING_ON
    size_t priority =
        phv->get_field(SSWITCH_PRIORITY_QUEUEING_SRC).get<size_t>();
    if (priority >= SSWITCH_PRIORITY_QUEUEING_NB_QUEUES) {
      bm::Logger::get()->error("Priority out of range, dropping packet");
      return;
    }
    egress_buffers.push_front(
        egress_port, SSWITCH_PRIORITY_QUEUEING_NB_QUEUES - 1 - priority,
        std::move(packet));
#else
    egress_buffers.push_front(egress_port, std::move(packet));
#endif
}
void
ParserOpSet<Data>::operator()(Packet *pkt, const char *data,
                              size_t *bytes_parsed) const {
  (void) bytes_parsed; (void) data;
  PHV *phv = pkt->get_phv();
  Field &f_dst = phv->get_field(dst.header, dst.offset);
  f_dst.set(src);
  BMLOG_DEBUG_PKT(*pkt, "Parser set: setting field ({}, {}) to {}",
                  dst.header, dst.offset, f_dst);
}
void
ParserOpSet<ArithExpression>::operator()(Packet *pkt, const char *data,
                                         size_t *bytes_parsed) const {
  (void) bytes_parsed; (void) data;
  PHV *phv = pkt->get_phv();
  Field &f_dst = phv->get_field(dst.header, dst.offset);
  src.eval(*phv, &f_dst);
  BMLOG_DEBUG_PKT(
    *pkt,
    "Parser set: setting field ({}, {}) from expression, new value is {}",
    dst.header, dst.offset, f_dst);
}
void IPv4Checksum::update_(Packet *pkt) const {
  char buffer[60];
  PHV *phv = pkt->get_phv();
  Header &ipv4_hdr = phv->get_header(header_id);
  if (!ipv4_hdr.is_valid()) return;
  Field &ipv4_cksum = ipv4_hdr[field_offset];
  ipv4_hdr.deparse(buffer);
  buffer[IPV4_CKSUM_OFFSET] = 0; buffer[IPV4_CKSUM_OFFSET + 1] = 0;
  uint16_t cksum = cksum16(buffer, ipv4_hdr.get_nbytes_packet());
  // cksum is in network byte order
  ipv4_cksum.set_bytes(reinterpret_cast<char *>(&cksum), 2);
}
Exemple #7
0
size_t
Deparser::get_headers_size(const PHV &phv) const {
  size_t headers_size = 0;
  for (auto it = headers.begin(); it != headers.end(); ++it) {
    const Header &header = phv.get_header(*it);
    if (header.is_valid()) {
      headers_size += header.get_nbytes_packet();
    }
  }
  return headers_size;
}
void LearnEngine::LearnSampleBuilder::operator()(
    const PHV &phv, ByteContainer *sample
) const
{
  for(const LearnSampleEntry &entry : entries) {
    const ByteContainer *bytes = nullptr;
    switch(entry.tag) {
    case LearnSampleEntry::FIELD:
      bytes = &phv.get_field(entry.field.header, entry.field.offset).get_bytes();
      break;
    case LearnSampleEntry::CONSTANT:
      bytes = &constants[entry.constant.offset];
      break;
    }
    sample->append(*bytes);
    // buffer.insert(buffer.end(), bytes->begin(), bytes->end());
  }
}
int
SimpleSwitch::receive(int port_num, const char *buffer, int len) {
  static int pkt_id = 0;

  // this is a good place to call this, because blocking this thread will not
  // block the processing of existing packet instances, which is a requirement
  if (do_swap() == 0) {
    check_queueing_metadata();
  }

  // we limit the packet buffer to original size + 512 bytes, which means we
  // cannot add more than 512 bytes of header data to the packet, which should
  // be more than enough
  auto packet = new_packet_ptr(port_num, pkt_id++, len,
                               bm::PacketBuffer(len + 512, buffer, len));

  BMELOG(packet_in, *packet);

  PHV *phv = packet->get_phv();
  // many current P4 programs assume this
  // it is also part of the original P4 spec
  phv->reset_metadata();

  // setting standard metadata

  phv->get_field("standard_metadata.ingress_port").set(port_num);
  // using packet register 0 to store length, this register will be updated for
  // each add_header / remove_header primitive call
  packet->set_register(PACKET_LENGTH_REG_IDX, len);
  phv->get_field("standard_metadata.packet_length").set(len);
  Field &f_instance_type = phv->get_field("standard_metadata.instance_type");
  f_instance_type.set(PKT_INSTANCE_TYPE_NORMAL);

  if (phv->has_field("intrinsic_metadata.ingress_global_timestamp")) {
    phv->get_field("intrinsic_metadata.ingress_global_timestamp")
        .set(get_ts().count());
  }

  input_buffer.push_front(std::move(packet));
  return 0;
}
void SimpleSwitch::pipeline_thread() {
  Pipeline *ingress_mau = this->get_pipeline("ingress");
  Pipeline *egress_mau = this->get_pipeline("egress");
  Parser *parser = this->get_parser("parser");
  Deparser *deparser = this->get_deparser("deparser");
  PHV *phv;

  while (1) {
    std::unique_ptr<Packet> packet;
    input_buffer.pop_back(&packet);
    phv = packet->get_phv();

    int ingress_port = packet->get_ingress_port();
    BMLOG_DEBUG_PKT(*packet, "Processing packet received on port {}",
                    ingress_port);

    phv->get_field("standard_metadata.ingress_port").set(ingress_port);
    ingress_port = phv->get_field("standard_metadata.ingress_port").get_int();
    std::cout << ingress_port << std::endl;

    parser->parse(packet.get());
    ingress_mau->apply(packet.get());

    int egress_port = phv->get_field("standard_metadata.egress_port").get_int();
    BMLOG_DEBUG_PKT(*packet, "Egress port is {}", egress_port);

    int learn_id = phv->get_field("intrinsic_metadata.learn_id").get_int();
    BMLOG_DEBUG_PKT(*packet, "Learn id is {}", learn_id);

    unsigned int mgid = phv->get_field("intrinsic_metadata.mgid").get_uint();
    BMLOG_DEBUG_PKT(*packet, "Mgid is {}", mgid);

    if (learn_id > 0) {
      get_learn_engine()->learn(learn_id, *packet.get());
      phv->get_field("intrinsic_metadata.learn_id").set(0);
    }

    if (egress_port == 511 && mgid == 0) {
      BMLOG_DEBUG_PKT(*packet, "Dropping packet");
      continue;
    }

    if (mgid != 0) {
      assert(mgid == 1);
      phv->get_field("intrinsic_metadata.mgid").set(0);
      packet_id_t copy_id = 1;
      const auto pre_out = pre->replicate({mgid});
      for (const auto &out : pre_out) {
        egress_port = out.egress_port;
        if (ingress_port == egress_port) continue;  // pruning
        BMLOG_DEBUG_PKT(*packet, "Replicating packet on port {}", egress_port);
        std::unique_ptr<Packet> packet_copy = packet->clone_with_phv_ptr();
        packet_copy->set_egress_port(egress_port);
        egress_mau->apply(packet_copy.get());
        deparser->deparse(packet_copy.get());
        output_buffer.push_front(std::move(packet_copy));
      }
    } else {
      packet->set_egress_port(egress_port);
      egress_mau->apply(packet.get());
      deparser->deparse(packet.get());
      output_buffer.push_front(std::move(packet));
    }
  }
}
/* I have made this function more efficient by using thread_local variables
   instead of dynamic allocation at each call. Maybe it would be better to just
   try to use a stack allocator */
void
Expression::eval_(const PHV &phv, ExprType expr_type,
                  const std::vector<Data> &locals,
                  bool *b_res, Data *d_res) const {
  assert(built);

  static thread_local int data_temps_size = 4;
  // std::vector<Data> data_temps(data_registers_cnt);
  static thread_local std::vector<Data> data_temps(data_temps_size);
  while (data_temps_size < data_registers_cnt) {
    data_temps.emplace_back();
    data_temps_size++;
  }

  /* Logically, I am using these as stacks but experiments showed that using
     vectors directly was more efficient (also I can call reserve to avoid
     multiple calls to malloc */

  /* 4 is arbitrary, it is possible to do an analysis on the Expression to find
     the exact number needed, but I don't think it is worth it... */

  static thread_local std::vector<bool> bool_temps_stack;
  bool_temps_stack.clear();
  // bool_temps_stack.reserve(4);

  static thread_local std::vector<const Data *> data_temps_stack;
  data_temps_stack.clear();
  // data_temps_stack.reserve(4);

  static thread_local std::vector<const Header *> header_temps_stack;
  header_temps_stack.clear();
  // header_temps_stack.reserve(4);

  bool lb, rb;
  const Data *ld, *rd;

  for (size_t i = 0; i < ops.size(); i++) {
    const auto &op = ops[i];
    switch (op.opcode) {
      case ExprOpcode::LOAD_FIELD:
        data_temps_stack.push_back(
            &(phv.get_field(op.field.header, op.field.field_offset)));
        break;

      case ExprOpcode::LOAD_HEADER:
        header_temps_stack.push_back(&(phv.get_header(op.header)));
        break;

      case ExprOpcode::LOAD_BOOL:
        bool_temps_stack.push_back(op.bool_value);
        break;

      case ExprOpcode::LOAD_CONST:
        data_temps_stack.push_back(&const_values[op.const_offset]);
        break;

      case ExprOpcode::LOAD_LOCAL:
        data_temps_stack.push_back(&locals[op.local_offset]);
        break;

      case ExprOpcode::LOAD_REGISTER_REF:
        data_temps_stack.push_back(
            &op.register_ref.array->at(op.register_ref.idx));
        break;

      case ExprOpcode::LOAD_REGISTER_GEN:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps_stack.push_back(&op.register_array->at(rd->get<size_t>()));
        break;

      case ExprOpcode::ADD:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].add(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::SUB:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].sub(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::MUL:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].multiply(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::SHIFT_LEFT:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].shift_left(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::SHIFT_RIGHT:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].shift_right(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::EQ_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld == *rd);
        break;

      case ExprOpcode::NEQ_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld != *rd);
        break;

      case ExprOpcode::GT_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld > *rd);
        break;

      case ExprOpcode::LT_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld < *rd);
        break;

      case ExprOpcode::GET_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld >= *rd);
        break;

      case ExprOpcode::LET_DATA:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        bool_temps_stack.push_back(*ld <= *rd);
        break;

      case ExprOpcode::AND:
        rb = bool_temps_stack.back(); bool_temps_stack.pop_back();
        lb = bool_temps_stack.back(); bool_temps_stack.pop_back();
        bool_temps_stack.push_back(lb && rb);
        break;

      case ExprOpcode::OR:
        rb = bool_temps_stack.back(); bool_temps_stack.pop_back();
        lb = bool_temps_stack.back(); bool_temps_stack.pop_back();
        bool_temps_stack.push_back(lb || rb);
        break;

      case ExprOpcode::NOT:
        rb = bool_temps_stack.back(); bool_temps_stack.pop_back();
        bool_temps_stack.push_back(!rb);
        break;

      case ExprOpcode::BIT_AND:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].bit_and(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::BIT_OR:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].bit_or(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::BIT_XOR:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        ld = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].bit_xor(*ld, *rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::BIT_NEG:
        rd = data_temps_stack.back(); data_temps_stack.pop_back();
        data_temps[op.data_dest_index].bit_neg(*rd);
        data_temps_stack.push_back(&data_temps[op.data_dest_index]);
        break;

      case ExprOpcode::VALID_HEADER:
        bool_temps_stack.push_back(header_temps_stack.back()->is_valid());
        header_temps_stack.pop_back();
        break;

      case ExprOpcode::TERNARY_OP:
        if (bool_temps_stack.back())
          i += 1;
        bool_temps_stack.pop_back();
        break;

      case ExprOpcode::SKIP:
        i += op.skip_num;
        break;

      default:
        assert(0 && "invalid operand");
        break;
    }
  }

  switch (expr_type) {
    case ExprType::EXPR_BOOL:
      *b_res = bool_temps_stack.back();
      break;
    case ExprType::EXPR_DATA:
      d_res->set(*(data_temps_stack.back()));
      break;
  }
}
void
SimpleSwitch::egress_thread(size_t worker_id) {
  PHV *phv;

  while (1) {
    std::unique_ptr<Packet> packet;
    size_t port;
    egress_buffers.pop_back(worker_id, &port, &packet);

    Deparser *deparser = this->get_deparser("deparser");
    Pipeline *egress_mau = this->get_pipeline("egress");

    phv = packet->get_phv();

    if (with_queueing_metadata) {
      auto enq_timestamp =
          phv->get_field("queueing_metadata.enq_timestamp").get<ts_res::rep>();
      phv->get_field("queueing_metadata.deq_timedelta").set(
          get_ts().count() - enq_timestamp);
      phv->get_field("queueing_metadata.deq_qdepth").set(
          egress_buffers.size(port));
    }

    phv->get_field("standard_metadata.egress_port").set(port);

    Field &f_egress_spec = phv->get_field("standard_metadata.egress_spec");
    f_egress_spec.set(0);

    phv->get_field("standard_metadata.packet_length").set(
        packet->get_register(PACKET_LENGTH_REG_IDX));

    egress_mau->apply(packet.get());

    Field &f_clone_spec = phv->get_field("standard_metadata.clone_spec");
    unsigned int clone_spec = f_clone_spec.get_uint();

    // EGRESS CLONING
    if (clone_spec) {
      BMLOG_DEBUG_PKT(*packet, "Cloning packet at egress");
      int egress_port = get_mirroring_mapping(clone_spec & 0xFFFF);
      if (egress_port >= 0) {
        f_clone_spec.set(0);
        p4object_id_t field_list_id = clone_spec >> 16;
        std::unique_ptr<Packet> packet_copy =
            packet->clone_with_phv_reset_metadata_ptr();
        PHV *phv_copy = packet_copy->get_phv();
        FieldList *field_list = this->get_field_list(field_list_id);
        for (const auto &p : *field_list) {
          phv_copy->get_field(p.header, p.offset)
            .set(phv->get_field(p.header, p.offset));
        }
        phv_copy->get_field("standard_metadata.instance_type")
            .set(PKT_INSTANCE_TYPE_EGRESS_CLONE);
        enqueue(egress_port, std::move(packet_copy));
      }
    }

    // TODO(antonin): should not be done like this in egress pipeline
    int egress_spec = f_egress_spec.get_int();
    if (egress_spec == 511) {  // drop packet
      BMLOG_DEBUG_PKT(*packet, "Dropping packet at the end of egress");
      continue;
    }

    deparser->deparse(packet.get());

    // RECIRCULATE
    if (phv->has_field("intrinsic_metadata.recirculate_flag")) {
      Field &f_recirc = phv->get_field("intrinsic_metadata.recirculate_flag");
      if (f_recirc.get_int()) {
        BMLOG_DEBUG_PKT(*packet, "Recirculating packet");
        p4object_id_t field_list_id = f_recirc.get_int();
        f_recirc.set(0);
        FieldList *field_list = this->get_field_list(field_list_id);
        // TODO(antonin): just like for resubmit, there is no need for a copy
        // here, but it is more convenient for this first prototype
        std::unique_ptr<Packet> packet_copy = packet->clone_no_phv_ptr();
        PHV *phv_copy = packet_copy->get_phv();
        phv_copy->reset_metadata();
        for (const auto &p : *field_list) {
          phv_copy->get_field(p.header, p.offset)
              .set(phv->get_field(p.header, p.offset));
        }
        phv_copy->get_field("standard_metadata.instance_type")
            .set(PKT_INSTANCE_TYPE_RECIRC);
        size_t packet_size = packet_copy->get_data_size();
        packet_copy->set_register(PACKET_LENGTH_REG_IDX, packet_size);
        phv_copy->get_field("standard_metadata.packet_length").set(packet_size);
        input_buffer.push_front(std::move(packet_copy));
        continue;
      }
    }

    output_buffer.push_front(std::move(packet));
  }
void
SimpleSwitch::ingress_thread() {
  PHV *phv;

  while (1) {
    std::unique_ptr<Packet> packet;
    input_buffer.pop_back(&packet);

    // TODO(antonin): only update these if swapping actually happened?
    Parser *parser = this->get_parser("parser");
    Pipeline *ingress_mau = this->get_pipeline("ingress");

    phv = packet->get_phv();

    int ingress_port = packet->get_ingress_port();
    (void) ingress_port;
    BMLOG_DEBUG_PKT(*packet, "Processing packet received on port {}",
                    ingress_port);

    /* This looks like it comes out of the blue. However this is needed for
       ingress cloning. The parser updates the buffer state (pops the parsed
       headers) to make the deparser's job easier (the same buffer is
       re-used). But for ingress cloning, the original packet is needed. This
       kind of looks hacky though. Maybe a better solution would be to have the
       parser leave the buffer unchanged, and move the pop logic to the
       deparser. TODO? */
    const Packet::buffer_state_t packet_in_state = packet->save_buffer_state();
    parser->parse(packet.get());

    ingress_mau->apply(packet.get());

    packet->reset_exit();

    Field &f_egress_spec = phv->get_field("standard_metadata.egress_spec");
    int egress_spec = f_egress_spec.get_int();

    Field &f_clone_spec = phv->get_field("standard_metadata.clone_spec");
    unsigned int clone_spec = f_clone_spec.get_uint();

    int learn_id = 0;
    unsigned int mgid = 0u;

    if (phv->has_field("intrinsic_metadata.lf_field_list")) {
      Field &f_learn_id = phv->get_field("intrinsic_metadata.lf_field_list");
      learn_id = f_learn_id.get_int();
    }

    // detect mcast support, if this is true we assume that other fields needed
    // for mcast are also defined
    if (phv->has_field("intrinsic_metadata.mcast_grp")) {
      Field &f_mgid = phv->get_field("intrinsic_metadata.mcast_grp");
      mgid = f_mgid.get_uint();
    }

    int egress_port;

    // INGRESS CLONING
    if (clone_spec) {
      BMLOG_DEBUG_PKT(*packet, "Cloning packet at ingress");
      egress_port = get_mirroring_mapping(clone_spec & 0xFFFF);
      f_clone_spec.set(0);
      if (egress_port >= 0) {
        const Packet::buffer_state_t packet_out_state =
            packet->save_buffer_state();
        packet->restore_buffer_state(packet_in_state);
        p4object_id_t field_list_id = clone_spec >> 16;
        auto packet_copy = copy_ingress_pkt(
            packet, PKT_INSTANCE_TYPE_INGRESS_CLONE, field_list_id);
        // we need to parse again
        // the alternative would be to pay the (huge) price of PHV copy for
        // every ingress packet
        parser->parse(packet_copy.get());
        enqueue(egress_port, std::move(packet_copy));
        packet->restore_buffer_state(packet_out_state);
      }
    }

    // LEARNING
    if (learn_id > 0) {
      get_learn_engine()->learn(learn_id, *packet.get());
    }

    // RESUBMIT
    if (phv->has_field("intrinsic_metadata.resubmit_flag")) {
      Field &f_resubmit = phv->get_field("intrinsic_metadata.resubmit_flag");
      if (f_resubmit.get_int()) {
        BMLOG_DEBUG_PKT(*packet, "Resubmitting packet");
        // get the packet ready for being parsed again at the beginning of
        // ingress
        packet->restore_buffer_state(packet_in_state);
        p4object_id_t field_list_id = f_resubmit.get_int();
        f_resubmit.set(0);
        // TODO(antonin): a copy is not needed here, but I don't yet have an
        // optimized way of doing this
        auto packet_copy = copy_ingress_pkt(
            packet, PKT_INSTANCE_TYPE_RESUBMIT, field_list_id);
        input_buffer.push_front(std::move(packet_copy));
        continue;
      }
    }

    Field &f_instance_type = phv->get_field("standard_metadata.instance_type");

    // MULTICAST
    int instance_type = f_instance_type.get_int();
    if (mgid != 0) {
      BMLOG_DEBUG_PKT(*packet, "Multicast requested for packet");
      Field &f_rid = phv->get_field("intrinsic_metadata.egress_rid");
      const auto pre_out = pre->replicate({mgid});
      auto packet_size = packet->get_register(PACKET_LENGTH_REG_IDX);
      for (const auto &out : pre_out) {
        egress_port = out.egress_port;
        // if (ingress_port == egress_port) continue; // pruning
        BMLOG_DEBUG_PKT(*packet, "Replicating packet on port {}", egress_port);
        f_rid.set(out.rid);
        f_instance_type.set(PKT_INSTANCE_TYPE_REPLICATION);
        std::unique_ptr<Packet> packet_copy = packet->clone_with_phv_ptr();
        packet_copy->set_register(PACKET_LENGTH_REG_IDX, packet_size);
        enqueue(egress_port, std::move(packet_copy));
      }
      f_instance_type.set(instance_type);

      // when doing multicast, we discard the original packet
      continue;
    }

    egress_port = egress_spec;
    BMLOG_DEBUG_PKT(*packet, "Egress port is {}", egress_port);

    if (egress_port == 511) {  // drop packet
      BMLOG_DEBUG_PKT(*packet, "Dropping packet at the end of ingress");
      continue;
    }

    enqueue(egress_port, std::move(packet));
  }
}