Beispiel #1
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  if (clock_gettime(CLOCK_MONOTONIC, &qe.rx_time) != 0)
  {
    LOG_ERROR("Failed to get receive timestamp: %s", strerror(errno));
    return PJ_TRUE;
  }

  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}
static pj_bool_t process_on_rx_msg(pjsip_rx_data* rdata)
{
  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);
  SAS::TrailId trail = get_trail(rdata);

  requests_counter->increment();

  // Check whether the request should be processed
  if (!(load_monitor->admit_request(trail)) &&
      (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) &&
      (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD))
  {
    // Discard non-ACK requests if there are no available tokens.
    // Respond statelessly with a 503 Service Unavailable, including a
    // Retry-After header with a zero length timeout.
    TRC_DEBUG("Rejected request due to overload");

    // LCOV_EXCL_START - can't meaningfully verify SAS in UT
    SAS::Marker start_marker(trail, MARKER_ID_START, 1u);
    SAS::report_marker(start_marker);

    SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0);
    event.add_static_param(load_monitor->get_target_latency());
    event.add_static_param(load_monitor->get_current_latency());
    event.add_static_param(load_monitor->get_rate_limit());
    SAS::report_event(event);

    SAS::Marker end_marker(trail, MARKER_ID_END, 1u);
    SAS::report_marker(end_marker);

    // LCOV_EXCL_STOP

    pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0);
    PJUtils::respond_stateless(stack_data.endpt,
                               rdata,
                               PJSIP_SC_SERVICE_UNAVAILABLE,
                               NULL,
                               (pjsip_hdr*)retry_after,
                               NULL);

    // We no longer terminate TCP connections on overload as the shutdown has
    // to wait for existing transactions to end and therefore it takes too
    // long to get feedback to the downstream node.  We expect downstream nodes
    // to rebalance load if possible triggered by receipt of the 503 responses.

    overload_counter->increment();
    return PJ_TRUE;
  }

  // If a message has parse errors, reject it (if it's a request other than ACK)
  // or drop it (if it's a response or an ACK request).
  if (!pj_list_empty((pj_list_type*)&rdata->msg_info.parse_err))
  {
    SAS::TrailId trail = get_trail(rdata);
    TRC_DEBUG("Report SAS start marker - trail (%llx)", trail);
    SAS::Marker start_marker(trail, MARKER_ID_START, 1u);
    SAS::report_marker(start_marker);

    pjsip_parser_err_report *err = rdata->msg_info.parse_err.next;
    while (err != &rdata->msg_info.parse_err)
    {
      TRC_VERBOSE("Error parsing header %.*s", (int)err->hname.slen, err->hname.ptr);
      SAS::Event event(trail, SASEvent::UNPARSEABLE_HEADER, 0);
      event.add_var_param((int)err->hname.slen, err->hname.ptr);
      SAS::report_event(event);
      err = err->next;
    }

    if (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG)
    {
      if (rdata->msg_info.msg->line.req.method.id == PJSIP_ACK_METHOD)
      {
        TRC_WARNING("Dropping malformed ACK request");
      }
      else
      {
        TRC_WARNING("Rejecting malformed request with a 400 error");
        PJUtils::respond_stateless(stack_data.endpt,
                                   rdata,
                                   PJSIP_SC_BAD_REQUEST,
                                   NULL,
                                   NULL,
                                   NULL);
      }
    }
    else
    {
      TRC_WARNING("Dropping malformed response");
    }

    // As this message is malformed, return PJ_TRUE to absorb it and
    // stop later modules from processing it.
    return PJ_TRUE;
  }
  return PJ_FALSE;
}
Beispiel #3
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  requests_counter->increment();

  // Check whether the request should be processed
  if (!(load_monitor->admit_request()) &&
      (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) &&
      (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD))
  {
    // Discard non-ACK requests if there are no available tokens.
    // Respond statelessly with a 503 Service Unavailable, including a
    // Retry-After header with a zero length timeout.
    LOG_DEBUG("Rejected request due to overload");

    pjsip_cid_hdr* cid = (pjsip_cid_hdr*)rdata->msg_info.cid;

    SAS::TrailId trail = get_trail(rdata);

    SAS::Marker start_marker(trail, MARKER_ID_START, 1u);
    SAS::report_marker(start_marker);

    SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0);
    event.add_static_param(load_monitor->get_target_latency());
    event.add_static_param(load_monitor->get_current_latency());
    event.add_static_param(load_monitor->get_rate_limit());
    SAS::report_event(event);

    PJUtils::report_sas_to_from_markers(trail, rdata->msg_info.msg);

    if ((rdata->msg_info.msg->line.req.method.id == PJSIP_REGISTER_METHOD) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_subscribe_method())) == 0) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_notify_method())) == 0))
    {
      // Omit the Call-ID for these requests, as the same Call-ID can be
      // reused over a long period of time and produce huge SAS trails.
      PJUtils::mark_sas_call_branch_ids(trail, NULL, rdata->msg_info.msg);
    }
    else
    {
      PJUtils::mark_sas_call_branch_ids(trail, cid, rdata->msg_info.msg);
    }

    SAS::Marker end_marker(trail, MARKER_ID_END, 1u);
    SAS::report_marker(end_marker);

    pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0);
    PJUtils::respond_stateless(stack_data.endpt,
                               rdata,
                               PJSIP_SC_SERVICE_UNAVAILABLE,
                               NULL,
                               (pjsip_hdr*)retry_after,
                               NULL);

    // We no longer terminate TCP connections on overload as the shutdown has
    // to wait for existing transactions to end and therefore it takes too
    // long to get feedback to the downstream node.  We expect downstream nodes
    // to rebalance load if possible triggered by receipt of the 503 responses.

    overload_counter->increment();
    return PJ_TRUE;
  }

  // Check that the worker threads are not all deadlocked.
  if (rx_msg_q.is_deadlocked())
  {
    // The queue has not been serviced for sufficiently long to imply that
    // all the worker threads are deadlock, so exit the process so it will be
    // restarted.
    LOG_ERROR("Detected worker thread deadlock - exiting");
    abort();
  }

  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  qe.stop_watch.start();

  // Notify the connection tracker that the transport is active.
  connection_tracker->connection_active(rdata->tp_info.transport);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;

  // Track the current queue size
  queue_size_accumulator->accumulate(rx_msg_q.size());
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}