static void rspmgr_incr_non_quorum_responses_stats(struct response_mgr *rspmgr) { if (rspmgr->is_read) stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner, client_non_quorum_r_responses); else stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner, client_non_quorum_w_responses); }
rstatus_t core_core(void *arg, uint32_t events) { rstatus_t status; struct conn *conn = arg; struct context *ctx = conn_to_ctx(conn); log_debug(LOG_VVERB, "event %04"PRIX32" on %s %d", events, conn_get_type_string(conn), conn->sd); conn->events = events; /* error takes precedence over read | write */ if (events & EVENT_ERR) { if (conn->err && conn->dyn_mode) { loga("conn err on dnode EVENT_ERR: %d", conn->err); } core_error(ctx, conn); return DN_ERROR; } /* read takes precedence over write */ if (events & EVENT_READ) { status = core_recv(ctx, conn); if (status != DN_OK || conn->done || conn->err) { if (conn->dyn_mode) { if (conn->err) { loga("conn err on dnode EVENT_READ: %d", conn->err); core_close(ctx, conn); return DN_ERROR; } return DN_OK; } core_close(ctx, conn); return DN_ERROR; } } if (events & EVENT_WRITE) { status = core_send(ctx, conn); if (status != DN_OK || conn->done || conn->err) { if (conn->dyn_mode) { if (conn->err) { loga("conn err on dnode EVENT_WRITE: %d", conn->err); core_close(ctx, conn); return DN_ERROR; } return DN_OK; } core_close(ctx, conn); return DN_ERROR; } } return DN_OK; }
static rstatus_t dnode_client_handle_response(struct conn *conn, msgid_t reqid, struct msg *rsp) { // Forward the response to the caller which is client connection. rstatus_t status = DN_OK; struct context *ctx = conn_to_ctx(conn); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); // Fetch the original request struct msg *req = dictFetchValue(conn->outstanding_msgs_dict, &reqid); if (!req) { log_notice("looks like we already cleanedup the request for %d", reqid); rsp_put(rsp); return DN_OK; } // dnode client has no extra logic of coalescing etc like the client/coordinator. // Hence all work for this request is done at this time ASSERT_LOG(!req->peer, "req %lu:%lu has peer set", req->id, req->parent_id); req->selected_rsp = rsp; rsp->peer = req; // Remove the message from the hash table. dictDelete(conn->outstanding_msgs_dict, &reqid); // If this request is first in the out queue, then the connection is ready, // add the connection to epoll for writing if (conn_is_req_first_in_outqueue(conn, req)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } } return status; }
/* Handle a response to a given request. if this is a quorum setting, choose the * right response. Then make sure all the requests are satisfied in a fragmented * request scenario and then use the post coalesce logic to cook up a combined * response */ static rstatus_t client_handle_response(struct conn *conn, msgid_t reqid, struct msg *rsp) { ASSERT_LOG(!rsp->peer, "response %lu:%lu has peer set", rsp->id, rsp->parent_id); // now the handler owns the response. ASSERT(conn->type == CONN_CLIENT); // Fetch the original request struct msg *req = dictFetchValue(conn->outstanding_msgs_dict, &reqid); if (!req) { log_notice("looks like we already cleanedup the request for %d", reqid); rsp_put(rsp); return DN_OK; } // we have to submit the response irrespective of the unref status. rstatus_t status = msg_handle_response(req, rsp); if (conn->waiting_to_unref) { // dont care about the status. if (req->awaiting_rsps) return DN_OK; // all responses received dictDelete(conn->outstanding_msgs_dict, &reqid); log_info("Putting req %d", req->id); req_put(req); client_unref_internal_try_put(conn); return DN_OK; } if (status == DN_NOOPS) { // by now the response is dropped if (!req->awaiting_rsps) { // if we have sent the response for this request or the connection // is closed and we are just waiting to drain off the messages. if (req->rsp_sent) { dictDelete(conn->outstanding_msgs_dict, &reqid); log_info("Putting req %d", req->id); req_put(req); } } } else if (status == DN_OK) { g_pre_coalesce(req->selected_rsp); if (req_done(conn, req)) { struct context *ctx = conn_to_ctx(conn); status = event_add_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } } } return status; }
rstatus_t core_core(void *arg, uint32_t events) { rstatus_t status; struct conn *conn = arg; struct context *ctx = conn_to_ctx(conn); if (conn->dyn_mode) { log_debug(LOG_VVERB, "event %04"PRIX32" on d_%c %d", events, conn->dnode_client ? 'c' : (conn->dnode_server ? 's' : 'p'), conn->sd); } else { log_debug(LOG_VVERB, "event %04"PRIX32" on %c %d", events, conn->client ? 'c' : (conn->proxy ? 'p' : 's'), conn->sd); } conn->events = events; /* error takes precedence over read | write */ if (events & EVENT_ERR) { core_error(ctx, conn); return DN_ERROR; } /* read takes precedence over write */ if (events & EVENT_READ) { status = core_recv(ctx, conn); if (status != DN_OK || conn->done || conn->err) { core_close(ctx, conn); return DN_ERROR; } } if (events & EVENT_WRITE) { status = core_send(ctx, conn); if (status != DN_OK || conn->done || conn->err) { core_close(ctx, conn); return DN_ERROR; } } return DN_OK; }
rstatus_t core_core(void *arg, uint32_t events) { rstatus_t status; struct conn *conn = arg; struct context *ctx = conn_to_ctx(conn); /* if (!conn->dyn_mode) { if (conn->client && !conn->proxy) { struct server_pool *sp = conn->owner; log_debug(LOG_VERB, "Client : '%.*s'", sp->name); } else if (!conn->client && !conn->proxy) { struct server *server = conn->owner; log_debug(LOG_VERB, "Storage server : '%.*s'", server->name); } else { struct server_pool *sp = conn->owner; log_debug(LOG_VERB, "Proxy : '%.*s'", sp->name); } } else { if (conn->dnode_client && !conn->dnode_server) { struct server_pool *sp = conn->owner; log_debug(LOG_VERB, "Dnode client : '%.*s'", sp->name); } else if (!conn->dnode_client && !conn->dnode_server) { struct server *server = conn->owner; log_debug(LOG_VERB, "Dnode peer : '%.*s'", server->name); } else { struct server_pool *sp = conn->owner; log_debug(LOG_VERB, "Dnode server : '%.*s'", sp->name); } } */ if (conn->dyn_mode) { log_debug(LOG_VVERB, "event %04"PRIX32" on d_%c %d", events, conn->dnode_client ? 'c' : (conn->dnode_server ? 's' : 'p'), conn->sd); } else { log_debug(LOG_VVERB, "event %04"PRIX32" on %c %d", events, conn->client ? 'c' : (conn->proxy ? 'p' : 's'), conn->sd); } conn->events = events; /* error takes precedence over read | write */ if (events & EVENT_ERR) { if (conn->err && conn->dyn_mode) { loga("conn err on dnode EVENT_ERR: %d", conn->err); } core_error(ctx, conn); return DN_ERROR; } /* read takes precedence over write */ if (events & EVENT_READ) { status = core_recv(ctx, conn); if (status != DN_OK || conn->done || conn->err) { if (conn->dyn_mode) { if (conn->err) { loga("conn err on dnode EVENT_READ: %d", conn->err); core_close(ctx, conn); return DN_ERROR; } return DN_OK; } core_close(ctx, conn); return DN_ERROR; } } if (events & EVENT_WRITE) { status = core_send(ctx, conn); if (status != DN_OK || conn->done || conn->err) { if (conn->dyn_mode) { if (conn->err) { loga("conn err on dnode EVENT_WRITE: %d", conn->err); core_close(ctx, conn); return DN_ERROR; } return DN_OK; } core_close(ctx, conn); return DN_ERROR; } } return DN_OK; }
/* There are chances that the request to the remote peer or its response got dropped. * Hence we may not always receive a response to the request at the head of the FIFO. * Hence what we do is we mark that request as errored and move on the next one * in the outgoing queue. This works since we always have message ids in monotonically * increasing order. */ static void dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; ASSERT(!peer_conn->dnode_client && !peer_conn->dnode_server); /* response from a peer implies that peer is ok and heartbeating */ dnode_peer_ok(ctx, peer_conn); /* dequeue peer message (request) from peer conn */ while (true) { req = TAILQ_FIRST(&peer_conn->omsg_q); log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp); c_conn = req->owner; if (req->id == rsp->dmsg->id) { dnode_rsp_forward_match(ctx, peer_conn, rsp); return; } // Report a mismatch and try to rectify log_error("MISMATCH: dnode %c %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); if (c_conn && conn_to_ctx(c_conn)) stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner, peer_mismatch_requests); // TODO : should you be worried about message id getting wrapped around to 0? if (rsp->dmsg->id < req->id) { // We received a response from the past. This indeed proves out of order // responses. A blunder to the architecture. Log it and drop the response. log_error("MISMATCH: received response from the past. Dropping it"); dnode_rsp_put(rsp); return; } if (req->consistency == DC_ONE) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_error("MISMATCHED DNODE RSP RECEIVED %c %d dmsg->id %u req %u:%u rsp %u:%u, skipping....", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("skipping req: "); msg_dump(req); } peer_conn->dequeue_outq(ctx, peer_conn, req); req->done = 1; // Create an appropriate response for the request so its propagated up; struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store); err_rsp->error = req->error = 1; err_rsp->err = req->err = BAD_FORMAT; err_rsp->dyn_error = req->dyn_error = BAD_FORMAT; err_rsp->dmsg = dmsg_get(); err_rsp->dmsg->id = req->id; log_debug(LOG_VERB, "%p <-> %p", req, err_rsp); /* establish err_rsp <-> req (response <-> request) link */ req->peer = err_rsp; err_rsp->peer = req; log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u", peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id); rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, err_rsp); IGNORE_RET_VAL(status); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }