static pj_bool_t on_rx_msg(pjsip_rx_data* rdata) { // Before we start, get a timestamp. This will track the time from // receiving a message to forwarding it on (or rejecting it). struct rx_msg_qe qe; if (clock_gettime(CLOCK_MONOTONIC, &qe.rx_time) != 0) { LOG_ERROR("Failed to get receive timestamp: %s", strerror(errno)); return PJ_TRUE; } // Do logging. local_log_rx_msg(rdata); sas_log_rx_msg(rdata); // Clone the message and queue it to a scheduler thread. pjsip_rx_data* clone_rdata; pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata); if (status != PJ_SUCCESS) { // Failed to clone the message, so drop it. LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str()); return PJ_TRUE; } // Make sure the trail identifier is passed across. set_trail(clone_rdata, get_trail(rdata)); // @TODO - need to think about back-pressure mechanisms. For example, // should we have a maximum depth of queue and drop messages after that? // May be better to hold on to the message until the queue has space - this // will force back pressure on the particular TCP connection. Or should we // have a queue per transport and round-robin them? LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata); qe.rdata = clone_rdata; rx_msg_q.push(qe); // return TRUE to flag that we have absorbed the incoming message. return PJ_TRUE; }
static struct rx_task_data *rx_task_data_create(pjsip_rx_data *rdata, struct ast_sip_endpoint *endpoint, struct ast_sip_aor *aor) { struct rx_task_data *task_data = ao2_alloc( sizeof(*task_data), rx_task_data_destroy); if (!task_data) { return NULL; } pjsip_rx_data_clone(rdata, 0, &task_data->rdata); task_data->endpoint = endpoint; ao2_ref(task_data->endpoint, +1); task_data->aor = aor; ao2_ref(task_data->aor, +1); return task_data; }
static pj_bool_t distributor(pjsip_rx_data *rdata) { pjsip_dialog *dlg; struct distributor_dialog_data *dist = NULL; struct ast_taskprocessor *serializer = NULL; pjsip_rx_data *clone; if (!ast_test_flag(&ast_options, AST_OPT_FLAG_FULLY_BOOTED)) { /* * Ignore everything until we are fully booted. Let the * peer retransmit messages until we are ready. */ return PJ_TRUE; } dlg = find_dialog(rdata); if (dlg) { ast_debug(3, "Searching for serializer associated with dialog %s for %s\n", dlg->obj_name, pjsip_rx_data_get_info(rdata)); dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY); if (dist) { ao2_lock(dist); serializer = ao2_bump(dist->serializer); ao2_unlock(dist); if (serializer) { ast_debug(3, "Found serializer %s associated with dialog %s\n", ast_taskprocessor_name(serializer), dlg->obj_name); } } } if (serializer) { /* We have a serializer so we know where to send the message. */ } else if (rdata->msg_info.msg->type == PJSIP_RESPONSE_MSG) { ast_debug(3, "No dialog serializer for %s. Using request transaction as basis.\n", pjsip_rx_data_get_info(rdata)); serializer = find_request_serializer(rdata); if (!serializer) { /* * Pick a serializer for the unmatched response. * We couldn't determine what serializer originally * sent the request or the serializer is gone. */ serializer = ast_sip_get_distributor_serializer(rdata); } } else if (!pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, &pjsip_cancel_method) || !pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, &pjsip_bye_method)) { /* We have a BYE or CANCEL request without a serializer. */ pjsip_endpt_respond_stateless(ast_sip_get_pjsip_endpoint(), rdata, PJSIP_SC_CALL_TSX_DOES_NOT_EXIST, NULL, NULL, NULL); ao2_cleanup(dist); return PJ_TRUE; } else { if (ast_taskprocessor_alert_get()) { /* * When taskprocessors get backed up, there is a good chance that * we are being overloaded and need to defer adding new work to * the system. To defer the work we will ignore the request and * rely on the peer's transport layer to retransmit the message. * We usually work off the overload within a few seconds. The * alternative is to send back a 503 response to these requests * and be done with it. */ ast_debug(3, "Taskprocessor overload alert: Ignoring '%s'.\n", pjsip_rx_data_get_info(rdata)); ao2_cleanup(dist); return PJ_TRUE; } /* Pick a serializer for the out-of-dialog request. */ serializer = ast_sip_get_distributor_serializer(rdata); } if (pjsip_rx_data_clone(rdata, 0, &clone) != PJ_SUCCESS) { ast_taskprocessor_unreference(serializer); ao2_cleanup(dist); return PJ_TRUE; } if (dist) { ao2_lock(dist); clone->endpt_info.mod_data[endpoint_mod.id] = ao2_bump(dist->endpoint); ao2_unlock(dist); ao2_cleanup(dist); } if (ast_sip_push_task(serializer, distribute, clone)) { ao2_cleanup(clone->endpt_info.mod_data[endpoint_mod.id]); pjsip_rx_data_free_cloned(clone); } ast_taskprocessor_unreference(serializer); return PJ_TRUE; }
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata) { // Do logging. local_log_rx_msg(rdata); sas_log_rx_msg(rdata); requests_counter->increment(); // Check whether the request should be processed if (!(load_monitor->admit_request()) && (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) && (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD)) { // Discard non-ACK requests if there are no available tokens. // Respond statelessly with a 503 Service Unavailable, including a // Retry-After header with a zero length timeout. LOG_DEBUG("Rejected request due to overload"); pjsip_cid_hdr* cid = (pjsip_cid_hdr*)rdata->msg_info.cid; SAS::TrailId trail = get_trail(rdata); SAS::Marker start_marker(trail, MARKER_ID_START, 1u); SAS::report_marker(start_marker); SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0); event.add_static_param(load_monitor->get_target_latency()); event.add_static_param(load_monitor->get_current_latency()); event.add_static_param(load_monitor->get_rate_limit()); SAS::report_event(event); PJUtils::report_sas_to_from_markers(trail, rdata->msg_info.msg); if ((rdata->msg_info.msg->line.req.method.id == PJSIP_REGISTER_METHOD) || ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_subscribe_method())) == 0) || ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_notify_method())) == 0)) { // Omit the Call-ID for these requests, as the same Call-ID can be // reused over a long period of time and produce huge SAS trails. PJUtils::mark_sas_call_branch_ids(trail, NULL, rdata->msg_info.msg); } else { PJUtils::mark_sas_call_branch_ids(trail, cid, rdata->msg_info.msg); } SAS::Marker end_marker(trail, MARKER_ID_END, 1u); SAS::report_marker(end_marker); pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0); PJUtils::respond_stateless(stack_data.endpt, rdata, PJSIP_SC_SERVICE_UNAVAILABLE, NULL, (pjsip_hdr*)retry_after, NULL); // We no longer terminate TCP connections on overload as the shutdown has // to wait for existing transactions to end and therefore it takes too // long to get feedback to the downstream node. We expect downstream nodes // to rebalance load if possible triggered by receipt of the 503 responses. overload_counter->increment(); return PJ_TRUE; } // Check that the worker threads are not all deadlocked. if (rx_msg_q.is_deadlocked()) { // The queue has not been serviced for sufficiently long to imply that // all the worker threads are deadlock, so exit the process so it will be // restarted. LOG_ERROR("Detected worker thread deadlock - exiting"); abort(); } // Before we start, get a timestamp. This will track the time from // receiving a message to forwarding it on (or rejecting it). struct rx_msg_qe qe; qe.stop_watch.start(); // Notify the connection tracker that the transport is active. connection_tracker->connection_active(rdata->tp_info.transport); // Clone the message and queue it to a scheduler thread. pjsip_rx_data* clone_rdata; pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata); if (status != PJ_SUCCESS) { // Failed to clone the message, so drop it. LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str()); return PJ_TRUE; } // Make sure the trail identifier is passed across. set_trail(clone_rdata, get_trail(rdata)); // @TODO - need to think about back-pressure mechanisms. For example, // should we have a maximum depth of queue and drop messages after that? // May be better to hold on to the message until the queue has space - this // will force back pressure on the particular TCP connection. Or should we // have a queue per transport and round-robin them? LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata); qe.rdata = clone_rdata; // Track the current queue size queue_size_accumulator->accumulate(rx_msg_q.size()); rx_msg_q.push(qe); // return TRUE to flag that we have absorbed the incoming message. return PJ_TRUE; }