Exemplo n.º 1
0
void stop_stack()
{
  // Terminate the PJSIP threads and the worker threads to exit.  We kill
  // the PJSIP threads first - if we killed the worker threads first the
  // rx_msg_q will stop getting serviced so could fill up blocking
  // PJSIP threads, causing a deadlock.

  // Set the quit flag to signal the PJSIP threads to exit, then wait
  // for them to exit.
  quit_flag = PJ_TRUE;

  for (std::vector<pj_thread_t*>::iterator i = pjsip_threads.begin();
       i != pjsip_threads.end();
       ++i)
  {
    pj_thread_join(*i);
  }

  // Now it is safe to signal the worker threads to exit via the queue and to
  // wait for them to terminate.
  rx_msg_q.terminate();
  for (std::vector<pj_thread_t*>::iterator i = worker_threads.begin();
       i != worker_threads.end();
       ++i)
  {
    pj_thread_join(*i);
  }
}
Exemplo n.º 2
0
/// Worker threads handle most SIP message processing.
static int worker_thread(void* p)
{
  // Set up data to always process incoming messages at the first PJSIP
  // module after our module.
  pjsip_process_rdata_param rp;
  pjsip_process_rdata_param_default(&rp);
  rp.start_mod = &mod_stack;
  rp.idx_after_start = 1;

  LOG_DEBUG("Worker thread started");

  struct rx_msg_qe qe = {0};

  while (rx_msg_q.pop(qe))
  {
    pjsip_rx_data* rdata = qe.rdata;
    if (rdata)
    {
      LOG_DEBUG("Worker thread dequeue message %p", rdata);
      pjsip_endpt_process_rx_data(stack_data.endpt, rdata, &rp, NULL);
      LOG_DEBUG("Worker thread completed processing message %p", rdata);
      pjsip_rx_data_free_cloned(rdata);

      struct timespec done_time;
      if (clock_gettime(CLOCK_MONOTONIC, &done_time) == 0)
      {
        long latency_us = (done_time.tv_nsec - qe.rx_time.tv_nsec) / 1000L +
                          (done_time.tv_sec - qe.rx_time.tv_sec) * 1000000L;
        LOG_DEBUG("Request latency = %ldus", latency_us);
        latency_accumulator->accumulate(latency_us);
        latency_accumulator->refresh();
      }
      else
      {
        LOG_ERROR("Failed to get done timestamp: %s", strerror(errno));
      }
    }
  }

  LOG_DEBUG("Worker thread ended");

  return 0;
}
Exemplo n.º 3
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  if (clock_gettime(CLOCK_MONOTONIC, &qe.rx_time) != 0)
  {
    LOG_ERROR("Failed to get receive timestamp: %s", strerror(errno));
    return PJ_TRUE;
  }

  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}
Exemplo n.º 4
0
/// Worker threads handle most SIP message processing.
static int worker_thread(void* p)
{
  // Set up data to always process incoming messages at the first PJSIP
  // module after our module.
  pjsip_process_rdata_param rp;
  pjsip_process_rdata_param_default(&rp);
  rp.start_mod = &mod_stack;
  rp.idx_after_start = 1;

  LOG_DEBUG("Worker thread started");

  struct rx_msg_qe qe = {0};

  while (rx_msg_q.pop(qe))
  {
    pjsip_rx_data* rdata = qe.rdata;
    if (rdata)
    {
      LOG_DEBUG("Worker thread dequeue message %p", rdata);
      pjsip_endpt_process_rx_data(stack_data.endpt, rdata, &rp, NULL);
      LOG_DEBUG("Worker thread completed processing message %p", rdata);
      pjsip_rx_data_free_cloned(rdata);

      unsigned long latency_us;
      if (qe.stop_watch.read(latency_us))
      {
        LOG_DEBUG("Request latency = %ldus", latency_us);
        latency_accumulator->accumulate(latency_us);
        load_monitor->request_complete(latency_us);
      }
      else
      {
        LOG_ERROR("Failed to get done timestamp: %s", strerror(errno));
      }
    }
  }

  LOG_DEBUG("Worker thread ended");

  return 0;
}
Exemplo n.º 5
0
pj_status_t init_pjsip()
{
  pj_status_t status;

  // Must init PJLIB first:
  status = pj_init();
  PJ_ASSERT_RETURN(status == PJ_SUCCESS, status);

  // Dump PJLIB config to log file.
  pj_dump_config();

  // Then init PJLIB-UTIL:
  status = pjlib_util_init();
  PJ_ASSERT_RETURN(status == PJ_SUCCESS, status);

  // Must create a pool factory before we can allocate any memory.
  pj_caching_pool_init(&stack_data.cp, &pj_pool_factory_default_policy, 0);
  // Create the endpoint.
  status = pjsip_endpt_create(&stack_data.cp.factory, NULL, &stack_data.endpt);
  PJ_ASSERT_RETURN(status == PJ_SUCCESS, status);

  // Init transaction layer.
  status = pjsip_tsx_layer_init_module(stack_data.endpt);
  PJ_ASSERT_RETURN(status == PJ_SUCCESS, status);

  // Create pool for the application
  stack_data.pool = pj_pool_create(&stack_data.cp.factory,
                                   "sprout-bono",
                                   4000,
                                   4000,
                                   NULL);

  status = register_custom_headers();
  PJ_ASSERT_RETURN(status == PJ_SUCCESS, status);

  // Enable deadlock detection on the message queue.
  rx_msg_q.set_deadlock_threshold(MSG_Q_DEADLOCK_TIME);

  return PJ_SUCCESS;
}
Exemplo n.º 6
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  requests_counter->increment();

  // Check whether the request should be processed
  if (!(load_monitor->admit_request()) &&
      (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) &&
      (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD))
  {
    // Discard non-ACK requests if there are no available tokens.
    // Respond statelessly with a 503 Service Unavailable, including a
    // Retry-After header with a zero length timeout.
    LOG_DEBUG("Rejected request due to overload");

    pjsip_cid_hdr* cid = (pjsip_cid_hdr*)rdata->msg_info.cid;

    SAS::TrailId trail = get_trail(rdata);

    SAS::Marker start_marker(trail, MARKER_ID_START, 1u);
    SAS::report_marker(start_marker);

    SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0);
    event.add_static_param(load_monitor->get_target_latency());
    event.add_static_param(load_monitor->get_current_latency());
    event.add_static_param(load_monitor->get_rate_limit());
    SAS::report_event(event);

    PJUtils::report_sas_to_from_markers(trail, rdata->msg_info.msg);

    if ((rdata->msg_info.msg->line.req.method.id == PJSIP_REGISTER_METHOD) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_subscribe_method())) == 0) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_notify_method())) == 0))
    {
      // Omit the Call-ID for these requests, as the same Call-ID can be
      // reused over a long period of time and produce huge SAS trails.
      PJUtils::mark_sas_call_branch_ids(trail, NULL, rdata->msg_info.msg);
    }
    else
    {
      PJUtils::mark_sas_call_branch_ids(trail, cid, rdata->msg_info.msg);
    }

    SAS::Marker end_marker(trail, MARKER_ID_END, 1u);
    SAS::report_marker(end_marker);

    pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0);
    PJUtils::respond_stateless(stack_data.endpt,
                               rdata,
                               PJSIP_SC_SERVICE_UNAVAILABLE,
                               NULL,
                               (pjsip_hdr*)retry_after,
                               NULL);

    // We no longer terminate TCP connections on overload as the shutdown has
    // to wait for existing transactions to end and therefore it takes too
    // long to get feedback to the downstream node.  We expect downstream nodes
    // to rebalance load if possible triggered by receipt of the 503 responses.

    overload_counter->increment();
    return PJ_TRUE;
  }

  // Check that the worker threads are not all deadlocked.
  if (rx_msg_q.is_deadlocked())
  {
    // The queue has not been serviced for sufficiently long to imply that
    // all the worker threads are deadlock, so exit the process so it will be
    // restarted.
    LOG_ERROR("Detected worker thread deadlock - exiting");
    abort();
  }

  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  qe.stop_watch.start();

  // Notify the connection tracker that the transport is active.
  connection_tracker->connection_active(rdata->tp_info.transport);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;

  // Track the current queue size
  queue_size_accumulator->accumulate(rx_msg_q.size());
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}