Beispiel #1
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  if (clock_gettime(CLOCK_MONOTONIC, &qe.rx_time) != 0)
  {
    LOG_ERROR("Failed to get receive timestamp: %s", strerror(errno));
    return PJ_TRUE;
  }

  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}
Beispiel #2
0
static struct rx_task_data *rx_task_data_create(pjsip_rx_data *rdata,
						struct ast_sip_endpoint *endpoint,
						struct ast_sip_aor *aor)
{
	struct rx_task_data *task_data = ao2_alloc(
		sizeof(*task_data), rx_task_data_destroy);

	if (!task_data) {
		return NULL;
	}

	pjsip_rx_data_clone(rdata, 0, &task_data->rdata);

	task_data->endpoint = endpoint;
	ao2_ref(task_data->endpoint, +1);

	task_data->aor = aor;
	ao2_ref(task_data->aor, +1);

	return task_data;
}
Beispiel #3
0
static pj_bool_t distributor(pjsip_rx_data *rdata)
{
	pjsip_dialog *dlg;
	struct distributor_dialog_data *dist = NULL;
	struct ast_taskprocessor *serializer = NULL;
	pjsip_rx_data *clone;

	if (!ast_test_flag(&ast_options, AST_OPT_FLAG_FULLY_BOOTED)) {
		/*
		 * Ignore everything until we are fully booted.  Let the
		 * peer retransmit messages until we are ready.
		 */
		return PJ_TRUE;
	}

	dlg = find_dialog(rdata);
	if (dlg) {
		ast_debug(3, "Searching for serializer associated with dialog %s for %s\n",
			dlg->obj_name, pjsip_rx_data_get_info(rdata));
		dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY);
		if (dist) {
			ao2_lock(dist);
			serializer = ao2_bump(dist->serializer);
			ao2_unlock(dist);
			if (serializer) {
				ast_debug(3, "Found serializer %s associated with dialog %s\n",
					ast_taskprocessor_name(serializer), dlg->obj_name);
			}
		}
	}

	if (serializer) {
		/* We have a serializer so we know where to send the message. */
	} else if (rdata->msg_info.msg->type == PJSIP_RESPONSE_MSG) {
		ast_debug(3, "No dialog serializer for %s.  Using request transaction as basis.\n",
			pjsip_rx_data_get_info(rdata));
		serializer = find_request_serializer(rdata);
		if (!serializer) {
			/*
			 * Pick a serializer for the unmatched response.
			 * We couldn't determine what serializer originally
			 * sent the request or the serializer is gone.
			 */
			serializer = ast_sip_get_distributor_serializer(rdata);
		}
	} else if (!pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, &pjsip_cancel_method)
		|| !pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, &pjsip_bye_method)) {
		/* We have a BYE or CANCEL request without a serializer. */
		pjsip_endpt_respond_stateless(ast_sip_get_pjsip_endpoint(), rdata,
			PJSIP_SC_CALL_TSX_DOES_NOT_EXIST, NULL, NULL, NULL);
		ao2_cleanup(dist);
		return PJ_TRUE;
	} else {
		if (ast_taskprocessor_alert_get()) {
			/*
			 * When taskprocessors get backed up, there is a good chance that
			 * we are being overloaded and need to defer adding new work to
			 * the system.  To defer the work we will ignore the request and
			 * rely on the peer's transport layer to retransmit the message.
			 * We usually work off the overload within a few seconds.  The
			 * alternative is to send back a 503 response to these requests
			 * and be done with it.
			 */
			ast_debug(3, "Taskprocessor overload alert: Ignoring '%s'.\n",
				pjsip_rx_data_get_info(rdata));
			ao2_cleanup(dist);
			return PJ_TRUE;
		}

		/* Pick a serializer for the out-of-dialog request. */
		serializer = ast_sip_get_distributor_serializer(rdata);
	}

	if (pjsip_rx_data_clone(rdata, 0, &clone) != PJ_SUCCESS) {
		ast_taskprocessor_unreference(serializer);
		ao2_cleanup(dist);
		return PJ_TRUE;
	}

	if (dist) {
		ao2_lock(dist);
		clone->endpt_info.mod_data[endpoint_mod.id] = ao2_bump(dist->endpoint);
		ao2_unlock(dist);
		ao2_cleanup(dist);
	}

	if (ast_sip_push_task(serializer, distribute, clone)) {
		ao2_cleanup(clone->endpt_info.mod_data[endpoint_mod.id]);
		pjsip_rx_data_free_cloned(clone);
	}

	ast_taskprocessor_unreference(serializer);

	return PJ_TRUE;
}
Beispiel #4
0
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata)
{
  // Do logging.
  local_log_rx_msg(rdata);
  sas_log_rx_msg(rdata);

  requests_counter->increment();

  // Check whether the request should be processed
  if (!(load_monitor->admit_request()) &&
      (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) &&
      (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD))
  {
    // Discard non-ACK requests if there are no available tokens.
    // Respond statelessly with a 503 Service Unavailable, including a
    // Retry-After header with a zero length timeout.
    LOG_DEBUG("Rejected request due to overload");

    pjsip_cid_hdr* cid = (pjsip_cid_hdr*)rdata->msg_info.cid;

    SAS::TrailId trail = get_trail(rdata);

    SAS::Marker start_marker(trail, MARKER_ID_START, 1u);
    SAS::report_marker(start_marker);

    SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0);
    event.add_static_param(load_monitor->get_target_latency());
    event.add_static_param(load_monitor->get_current_latency());
    event.add_static_param(load_monitor->get_rate_limit());
    SAS::report_event(event);

    PJUtils::report_sas_to_from_markers(trail, rdata->msg_info.msg);

    if ((rdata->msg_info.msg->line.req.method.id == PJSIP_REGISTER_METHOD) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_subscribe_method())) == 0) ||
        ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_notify_method())) == 0))
    {
      // Omit the Call-ID for these requests, as the same Call-ID can be
      // reused over a long period of time and produce huge SAS trails.
      PJUtils::mark_sas_call_branch_ids(trail, NULL, rdata->msg_info.msg);
    }
    else
    {
      PJUtils::mark_sas_call_branch_ids(trail, cid, rdata->msg_info.msg);
    }

    SAS::Marker end_marker(trail, MARKER_ID_END, 1u);
    SAS::report_marker(end_marker);

    pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0);
    PJUtils::respond_stateless(stack_data.endpt,
                               rdata,
                               PJSIP_SC_SERVICE_UNAVAILABLE,
                               NULL,
                               (pjsip_hdr*)retry_after,
                               NULL);

    // We no longer terminate TCP connections on overload as the shutdown has
    // to wait for existing transactions to end and therefore it takes too
    // long to get feedback to the downstream node.  We expect downstream nodes
    // to rebalance load if possible triggered by receipt of the 503 responses.

    overload_counter->increment();
    return PJ_TRUE;
  }

  // Check that the worker threads are not all deadlocked.
  if (rx_msg_q.is_deadlocked())
  {
    // The queue has not been serviced for sufficiently long to imply that
    // all the worker threads are deadlock, so exit the process so it will be
    // restarted.
    LOG_ERROR("Detected worker thread deadlock - exiting");
    abort();
  }

  // Before we start, get a timestamp.  This will track the time from
  // receiving a message to forwarding it on (or rejecting it).
  struct rx_msg_qe qe;
  qe.stop_watch.start();

  // Notify the connection tracker that the transport is active.
  connection_tracker->connection_active(rdata->tp_info.transport);

  // Clone the message and queue it to a scheduler thread.
  pjsip_rx_data* clone_rdata;
  pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata);

  if (status != PJ_SUCCESS)
  {
    // Failed to clone the message, so drop it.
    LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str());
    return PJ_TRUE;
  }

  // Make sure the trail identifier is passed across.
  set_trail(clone_rdata, get_trail(rdata));

  // @TODO - need to think about back-pressure mechanisms.  For example,
  // should we have a maximum depth of queue and drop messages after that?
  // May be better to hold on to the message until the queue has space - this
  // will force back pressure on the particular TCP connection.  Or should we
  // have a queue per transport and round-robin them?

  LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata);
  qe.rdata = clone_rdata;

  // Track the current queue size
  queue_size_accumulator->accumulate(rx_msg_q.size());
  rx_msg_q.push(qe);

  // return TRUE to flag that we have absorbed the incoming message.
  return PJ_TRUE;
}