static pj_bool_t on_rx_msg(pjsip_rx_data* rdata) { // Before we start, get a timestamp. This will track the time from // receiving a message to forwarding it on (or rejecting it). struct rx_msg_qe qe; if (clock_gettime(CLOCK_MONOTONIC, &qe.rx_time) != 0) { LOG_ERROR("Failed to get receive timestamp: %s", strerror(errno)); return PJ_TRUE; } // Do logging. local_log_rx_msg(rdata); sas_log_rx_msg(rdata); // Clone the message and queue it to a scheduler thread. pjsip_rx_data* clone_rdata; pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata); if (status != PJ_SUCCESS) { // Failed to clone the message, so drop it. LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str()); return PJ_TRUE; } // Make sure the trail identifier is passed across. set_trail(clone_rdata, get_trail(rdata)); // @TODO - need to think about back-pressure mechanisms. For example, // should we have a maximum depth of queue and drop messages after that? // May be better to hold on to the message until the queue has space - this // will force back pressure on the particular TCP connection. Or should we // have a queue per transport and round-robin them? LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata); qe.rdata = clone_rdata; rx_msg_q.push(qe); // return TRUE to flag that we have absorbed the incoming message. return PJ_TRUE; }
static pj_bool_t on_rx_msg(pjsip_rx_data* rdata) { // Do logging. local_log_rx_msg(rdata); sas_log_rx_msg(rdata); requests_counter->increment(); // Check whether the request should be processed if (!(load_monitor->admit_request()) && (rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) && (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD)) { // Discard non-ACK requests if there are no available tokens. // Respond statelessly with a 503 Service Unavailable, including a // Retry-After header with a zero length timeout. LOG_DEBUG("Rejected request due to overload"); pjsip_cid_hdr* cid = (pjsip_cid_hdr*)rdata->msg_info.cid; SAS::TrailId trail = get_trail(rdata); SAS::Marker start_marker(trail, MARKER_ID_START, 1u); SAS::report_marker(start_marker); SAS::Event event(trail, SASEvent::SIP_OVERLOAD, 0); event.add_static_param(load_monitor->get_target_latency()); event.add_static_param(load_monitor->get_current_latency()); event.add_static_param(load_monitor->get_rate_limit()); SAS::report_event(event); PJUtils::report_sas_to_from_markers(trail, rdata->msg_info.msg); if ((rdata->msg_info.msg->line.req.method.id == PJSIP_REGISTER_METHOD) || ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_subscribe_method())) == 0) || ((pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, pjsip_get_notify_method())) == 0)) { // Omit the Call-ID for these requests, as the same Call-ID can be // reused over a long period of time and produce huge SAS trails. PJUtils::mark_sas_call_branch_ids(trail, NULL, rdata->msg_info.msg); } else { PJUtils::mark_sas_call_branch_ids(trail, cid, rdata->msg_info.msg); } SAS::Marker end_marker(trail, MARKER_ID_END, 1u); SAS::report_marker(end_marker); pjsip_retry_after_hdr* retry_after = pjsip_retry_after_hdr_create(rdata->tp_info.pool, 0); PJUtils::respond_stateless(stack_data.endpt, rdata, PJSIP_SC_SERVICE_UNAVAILABLE, NULL, (pjsip_hdr*)retry_after, NULL); // We no longer terminate TCP connections on overload as the shutdown has // to wait for existing transactions to end and therefore it takes too // long to get feedback to the downstream node. We expect downstream nodes // to rebalance load if possible triggered by receipt of the 503 responses. overload_counter->increment(); return PJ_TRUE; } // Check that the worker threads are not all deadlocked. if (rx_msg_q.is_deadlocked()) { // The queue has not been serviced for sufficiently long to imply that // all the worker threads are deadlock, so exit the process so it will be // restarted. LOG_ERROR("Detected worker thread deadlock - exiting"); abort(); } // Before we start, get a timestamp. This will track the time from // receiving a message to forwarding it on (or rejecting it). struct rx_msg_qe qe; qe.stop_watch.start(); // Notify the connection tracker that the transport is active. connection_tracker->connection_active(rdata->tp_info.transport); // Clone the message and queue it to a scheduler thread. pjsip_rx_data* clone_rdata; pj_status_t status = pjsip_rx_data_clone(rdata, 0, &clone_rdata); if (status != PJ_SUCCESS) { // Failed to clone the message, so drop it. LOG_ERROR("Failed to clone incoming message (%s)", PJUtils::pj_status_to_string(status).c_str()); return PJ_TRUE; } // Make sure the trail identifier is passed across. set_trail(clone_rdata, get_trail(rdata)); // @TODO - need to think about back-pressure mechanisms. For example, // should we have a maximum depth of queue and drop messages after that? // May be better to hold on to the message until the queue has space - this // will force back pressure on the particular TCP connection. Or should we // have a queue per transport and round-robin them? LOG_DEBUG("Queuing cloned received message %p for worker threads", clone_rdata); qe.rdata = clone_rdata; // Track the current queue size queue_size_accumulator->accumulate(rx_msg_q.size()); rx_msg_q.push(qe); // return TRUE to flag that we have absorbed the incoming message. return PJ_TRUE; }