static bool dnode_rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(conn->type == CONN_DNODE_PEER_SERVER); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "dyn: filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_debug(LOG_INFO, "dyn: filter stray rsp %"PRIu64" len %"PRIu32" on s %d noreply %d", msg->id, msg->mlen, conn->sd, msg->noreply); rsp_put(msg); return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); return false; }
static bool rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(!conn->client && !conn->proxy); if(conn->is_Select_Msg){ conn->is_Select_Msg = 0; rsp_put(msg); log_debug(LOG_VERB," select success rsp %"PRIu64" len %"PRIu32" on s %d ", msg->id, msg->mlen, conn->sd); //ignore first response return true; } if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_error("filter stray rsp %"PRIu64" len %"PRIu32" on s %d", msg->id, msg->mlen, conn->sd); rsp_put(msg); errno = EINVAL; conn->err = errno; return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); if (pmsg->swallow) { conn->dequeue_outq(ctx, conn, pmsg); pmsg->done = 1; log_debug(LOG_INFO, "swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", msg->id, msg->mlen, pmsg->id, conn->sd); rsp_put(msg); req_put(pmsg); return true; } return false; }
static bool rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(!conn->client && !conn->proxy); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_debug(LOG_VERB, "filter stray rsp %"PRIu64" len %"PRIu32" on s %d", msg->id, msg->mlen, conn->sd); rsp_put(msg); return true; } if (pmsg->noreply) { conn->dequeue_outq(ctx, conn, pmsg); rsp_put(pmsg); rsp_put(msg); return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); if (pmsg->swallow) { conn->dequeue_outq(ctx, conn, pmsg); pmsg->done = 1; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", msg->id, msg->mlen, pmsg->id, conn->sd); } rsp_put(msg); req_put(pmsg); return true; } return false; }
void rspmgr_free_other_responses(struct response_mgr *rspmgr, struct msg *dont_free) { int i; for (i = 0; i < rspmgr->good_responses; i++) { if (dont_free && (rspmgr->responses[i] == dont_free)) continue; rsp_put(rspmgr->responses[i]); } if (rspmgr->err_rsp) { if (dont_free && (dont_free == rspmgr->err_rsp)) return; rsp_put(rspmgr->err_rsp); } }
static rstatus_t dnode_client_handle_response(struct conn *conn, msgid_t reqid, struct msg *rsp) { // Forward the response to the caller which is client connection. rstatus_t status = DN_OK; struct context *ctx = conn_to_ctx(conn); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); // Fetch the original request struct msg *req = dictFetchValue(conn->outstanding_msgs_dict, &reqid); if (!req) { log_notice("looks like we already cleanedup the request for %d", reqid); rsp_put(rsp); return DN_OK; } // dnode client has no extra logic of coalescing etc like the client/coordinator. // Hence all work for this request is done at this time ASSERT_LOG(!req->peer, "req %lu:%lu has peer set", req->id, req->parent_id); req->selected_rsp = rsp; rsp->peer = req; // Remove the message from the hash table. dictDelete(conn->outstanding_msgs_dict, &reqid); // If this request is first in the out queue, then the connection is ready, // add the connection to epoll for writing if (conn_is_req_first_in_outqueue(conn, req)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } } return status; }
static bool rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(!conn->client && !conn->proxy); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_debug(LOG_ERR, "filter stray rsp %"PRIu64" len %"PRIu32" on s %d", msg->id, msg->mlen, conn->sd); rsp_put(msg); return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); /* establish msg <-> pmsg (response <-> request) link */ msg->peer = pmsg; pmsg->peer = msg; if (pmsg->swallow) { if (pmsg->pre_swallow != NULL) { pmsg->pre_swallow(ctx, conn, msg); } conn->dequeue_outq(ctx, conn, pmsg); pmsg->done = 1; log_debug(LOG_INFO, "swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", msg->id, msg->mlen, pmsg->id, conn->sd); req_put(pmsg); return true; } return false; }
struct msg * rsp_recv_next(struct context *ctx, struct conn *conn, bool alloc) { struct msg *msg; ASSERT(!conn->client && !conn->proxy); ASSERT(!conn->connecting); if (conn->eof) { msg = conn->rmsg; /* server sent eof before sending the entire request */ if (msg != NULL) { conn->rmsg = NULL; ASSERT(msg->peer == NULL); ASSERT(!msg->request); log_error("eof s %d discarding incomplete rsp %"PRIu64" len " "%"PRIu32"", conn->sd, msg->id, msg->mlen); rsp_put(msg); } /* * We treat TCP half-close from a server different from how we treat * those from a client. On a FIN from a server, we close the connection * immediately by sending the second FIN even if there were outstanding * or pending requests. This is actually a tricky part in the FA, as * we don't expect this to happen unless the server is misbehaving or * it crashes */ conn->done = 1; log_error("s %d active %d is done", conn->sd, conn->active(conn)); return NULL; } msg = conn->rmsg; if (msg != NULL) { ASSERT(!msg->request); return msg; } if (!alloc) { return NULL; } msg = rsp_get(conn); if (msg != NULL) { conn->rmsg = msg; } return msg; }
static rstatus_t swallow_extra_rsp(struct msg *req, struct msg *rsp) { log_info("req %d swallowing response %d", req->id, rsp->id); ASSERT_LOG(req->awaiting_rsps, "Req %d:%d already has no awaiting rsps, rsp %d", req->id, req->parent_id, rsp->id); // drop this response. rsp_put(rsp); msg_decr_awaiting_rsps(req); return DN_NOOPS; }
static struct msg * rsp_make_error(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; /* peer message (response) */ struct msg *cmsg, *nmsg; /* current and next message (request) */ uint64_t id; err_t err; ASSERT(conn->client && !conn->proxy); ASSERT(msg->request && req_error(conn, msg)); ASSERT(msg->owner == conn); id = msg->frag_id; if (id != 0) { for (err = 0, cmsg = TAILQ_NEXT(msg, c_tqe); cmsg != NULL && cmsg->frag_id == id; cmsg = nmsg) { nmsg = TAILQ_NEXT(cmsg, c_tqe); /* dequeue request (error fragment) from client outq */ conn->dequeue_outq(ctx, conn, cmsg); if (err == 0 && cmsg->err != 0) { err = cmsg->err; } req_put(cmsg); } } else { err = msg->err; } pmsg = msg->peer; if (pmsg != NULL) { ASSERT(!pmsg->request && pmsg->peer == msg); msg->peer = NULL; pmsg->peer = NULL; rsp_put(pmsg); } #if 1 //shenzheng 2014-12-4 common //attention: the new error macro we defined must be a negative number. if(err >= 0) { #endif return msg_get_error(conn->redis, err); #if 1 //shenzheng 2014-12-4 common } else { return msg_get_error_other(conn->redis, err); } #endif }
/* Handle a response to a given request. if this is a quorum setting, choose the * right response. Then make sure all the requests are satisfied in a fragmented * request scenario and then use the post coalesce logic to cook up a combined * response */ static rstatus_t client_handle_response(struct conn *conn, msgid_t reqid, struct msg *rsp) { ASSERT_LOG(!rsp->peer, "response %lu:%lu has peer set", rsp->id, rsp->parent_id); // now the handler owns the response. ASSERT(conn->type == CONN_CLIENT); // Fetch the original request struct msg *req = dictFetchValue(conn->outstanding_msgs_dict, &reqid); if (!req) { log_notice("looks like we already cleanedup the request for %d", reqid); rsp_put(rsp); return DN_OK; } // we have to submit the response irrespective of the unref status. rstatus_t status = msg_handle_response(req, rsp); if (conn->waiting_to_unref) { // dont care about the status. if (req->awaiting_rsps) return DN_OK; // all responses received dictDelete(conn->outstanding_msgs_dict, &reqid); log_info("Putting req %d", req->id); req_put(req); client_unref_internal_try_put(conn); return DN_OK; } if (status == DN_NOOPS) { // by now the response is dropped if (!req->awaiting_rsps) { // if we have sent the response for this request or the connection // is closed and we are just waiting to drain off the messages. if (req->rsp_sent) { dictDelete(conn->outstanding_msgs_dict, &reqid); log_info("Putting req %d", req->id); req_put(req); } } } else if (status == DN_OK) { g_pre_coalesce(req->selected_rsp); if (req_done(conn, req)) { struct context *ctx = conn_to_ctx(conn); status = event_add_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } } } return status; }
static void dnode_rsp_swallow(struct context *ctx, struct conn *peer_conn, struct msg *req, struct msg *rsp) { conn_dequeue_outq(ctx, peer_conn, req); req->done = 1; log_debug(LOG_VERB, "conn %p swallow %p", peer_conn, req); if (rsp) { log_debug(LOG_INFO, "dyn: swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", rsp->id, rsp->mlen, req->id, peer_conn->sd); rsp_put(rsp); } req_put(req); }
void req_put(struct msg *msg) { struct msg *pmsg; /* peer message (response) */ ASSERT(msg->request); pmsg = msg->peer; if (pmsg != NULL) { ASSERT(!pmsg->request && pmsg->peer == msg); msg->peer = NULL; pmsg->peer = NULL; rsp_put(pmsg); } msg_put(msg); }
void sentinel_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg) { rstatus_t status; ASSERT(!conn->client && !conn->proxy && conn->sentinel); ASSERT(msg != NULL && conn->rmsg == msg); ASSERT(!msg->request); ASSERT(msg->owner == conn); ASSERT(nmsg == NULL || !nmsg->request); ASSERT(sentinel_status != SENTINEL_CONN_DISCONNECTED); /* enqueue next message (response), if any */ conn->rmsg = nmsg; switch (sentinel_status) { case SENTINEL_CONN_SEND_REQ: status = sentinel_proc_sentinel_info(ctx, msg); if (status == NC_OK) { sentinel_status = SENTINEL_CONN_ACK_INFO; } break; case SENTINEL_CONN_ACK_INFO: status = sentinel_proc_acksub(ctx, msg); if (status == NC_OK) { sentinel_status = SENTINEL_CONN_ACK_SUB; } break; case SENTINEL_CONN_ACK_SUB: status = sentinel_proc_pub(ctx, msg); break; default: status = NC_ERROR; } rsp_put(msg); if (status != NC_OK) { log_error("sentinel's response error, close sentinel conn."); conn->done = 1; } }
static struct msg * rsp_make_error(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; /* peer message (response) */ struct msg *cmsg, *nmsg; /* current and next message (request) */ uint64_t id; err_t err; ASSERT((conn->type == CONN_CLIENT) || (conn->type == CONN_DNODE_PEER_CLIENT)); ASSERT(msg->request && req_error(conn, msg)); ASSERT(msg->owner == conn); id = msg->frag_id; if (id != 0) { for (err = 0, cmsg = TAILQ_NEXT(msg, c_tqe); cmsg != NULL && cmsg->frag_id == id; cmsg = nmsg) { nmsg = TAILQ_NEXT(cmsg, c_tqe); /* dequeue request (error fragment) from client outq */ conn_dequeue_outq(ctx, conn, cmsg); if (err == 0 && cmsg->err != 0) { err = cmsg->err; } req_put(cmsg); } } else { err = msg->err; } pmsg = msg->selected_rsp; if (pmsg != NULL) { ASSERT(!pmsg->request && pmsg->peer == msg); msg->selected_rsp = NULL; pmsg->peer = NULL; rsp_put(pmsg); } return msg_get_error(conn, msg->dyn_error, err); }
static struct msg * rsp_make_error(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; /* peer message (response) */ struct msg *cmsg, *nmsg; /* current and next message (request) */ uint64_t id; err_t err; ASSERT(conn->client && !conn->proxy); ASSERT(msg->request && req_error(conn, msg)); ASSERT(msg->owner == conn); id = msg->frag_id; /* 将属于同一分片的msg的都干掉 */ if (id != 0) { for (err = 0, cmsg = TAILQ_NEXT(msg, c_tqe); cmsg != NULL && cmsg->frag_id == id; cmsg = nmsg) { nmsg = TAILQ_NEXT(cmsg, c_tqe); /* dequeue request (error fragment) from client outq */ conn->dequeue_outq(ctx, conn, cmsg); if (err == 0 && cmsg->err != 0) { err = cmsg->err; } req_put(cmsg); } } else { err = msg->err; } pmsg = msg->peer; if (pmsg != NULL) { ASSERT(!pmsg->request && pmsg->peer == msg); msg->peer = NULL; pmsg->peer = NULL; rsp_put(pmsg); } return msg_get_error(conn->redis, err); }
rstatus_t rspmgr_submit_response(struct response_mgr *rspmgr, struct msg*rsp) { log_info("req %d submitting response %d awaiting_rsps %d", rspmgr->msg->id, rsp->id, rspmgr->msg->awaiting_rsps); if (rsp->error) { log_debug(LOG_VERB, "Received error response %d:%d for req %d:%d", rsp->id, rsp->parent_id, rspmgr->msg->id, rspmgr->msg->parent_id); rspmgr->error_responses++; if (rspmgr->err_rsp == NULL) rspmgr->err_rsp = rsp; else rsp_put(rsp); } else { rspmgr->checksums[rspmgr->good_responses] = msg_payload_crc32(rsp); log_debug(LOG_VERB, "Good response %d:%d checksum %u", rsp->id, rsp->parent_id, rspmgr->checksums[rspmgr->good_responses]); rspmgr->responses[rspmgr->good_responses++] = rsp; } msg_decr_awaiting_rsps(rspmgr->msg); return DN_OK; }
void server_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ struct conn *c_conn; /* peer client connection */ ASSERT(!conn->client && !conn->proxy); server_close_stats(ctx, conn->owner, conn->err, conn->eof, conn->connected); if (conn->sd < 0) { server_failure(ctx, conn->owner); conn->unref(conn); conn_put(conn); return; } for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server inq */ conn->dequeue_inq(ctx, conn, msg); /* * Don't send any error response, if * 1. request is tagged as noreply or, * 2. client has already closed its connection */ if (msg->swallow || msg->noreply) { log_debug(LOG_INFO, "close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; //ASSERT(c_conn->client && !c_conn->proxy); msg->done = 1; msg->error = 1; msg->err = conn->err; msg->dyn_error = STORAGE_CONNECTION_REFUSE; if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } } ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server outq */ conn->dequeue_outq(ctx, conn, msg); if (msg->swallow) { log_debug(LOG_INFO, "close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; //ASSERT(c_conn->client && !c_conn->proxy); msg->done = 1; msg->error = 1; msg->err = conn->err; if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(!msg->request); ASSERT(msg->peer == NULL); rsp_put(msg); log_debug(LOG_INFO, "close s %d discarding rsp %"PRIu64" len %"PRIu32" " "in error", conn->sd, msg->id, msg->mlen); } ASSERT(conn->smsg == NULL); server_failure(ctx, conn->owner); conn->unref(conn); status = close(conn->sd); if (status < 0) { log_error("close s %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
static bool rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(!conn->client && !conn->proxy); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_debug(LOG_ERR, "filter stray rsp %"PRIu64" len %"PRIu32" on s %d", msg->id, msg->mlen, conn->sd); rsp_put(msg); /* * Memcached server can respond with an error response before it has * received the entire request. This is most commonly seen for set * requests that exceed item_size_max. IMO, this behavior of memcached * is incorrect. The right behavior for update requests that are over * item_size_max would be to either: * - close the connection Or, * - read the entire item_size_max data and then send CLIENT_ERROR * * We handle this stray packet scenario in nutcracker by closing the * server connection which would end up sending SERVER_ERROR to all * clients that have requests pending on this server connection. The * fix is aggressive, but not doing so would lead to clients getting * out of sync with the server and as a result clients end up getting * responses that don't correspond to the right request. * * See: https://github.com/twitter/twemproxy/issues/149 */ conn->err = EINVAL; conn->done = 1; return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); if (pmsg->swallow) { conn->swallow_msg(conn, pmsg, msg); conn->dequeue_outq(ctx, conn, pmsg); pmsg->done = 1; log_debug(LOG_INFO, "swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", msg->id, msg->mlen, pmsg->id, conn->sd); rsp_put(msg); req_put(pmsg); return true; } return false; }
/* * Sending a mbuf of gossip data over the wire to a peer */ void dnode_peer_gossip_forward(struct context *ctx, struct conn *conn, int data_store, struct mbuf *data_buf) { rstatus_t status; struct msg *msg = msg_get(conn, 1, data_store, __FUNCTION__); if (msg == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a msg"); return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a data_buf"); rsp_put(msg); return; } uint64_t msg_id = peer_msg_id++; if (conn->dnode_secured) { if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "Assemble a secured msg to send"); log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an data_buf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } //write dnode header dmsg_write(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(encrypted_buf)); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_remove(&msg->mhdr, data_buf); mbuf_insert(&msg->mhdr, encrypted_buf); //free data_buf as no one will need it again mbuf_put(data_buf); //TODOS: need to remove this from the msg->mhdr as in the other method } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "No encryption"); } dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert(&msg->mhdr, data_buf); } } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Assemble a non-secured msg to send"); } dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert(&msg->mhdr, data_buf); } mbuf_insert_head(&msg->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn gossip message header: "); msg_dump(msg); } /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&conn->imsg_q)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { dnode_req_forward_error(ctx, conn, msg); conn->err = errno; return; } } //need to handle a reply //conn->enqueue_outq(ctx, conn, msg); msg->noreply = 1; conn_enqueue_inq(ctx, conn, msg); }
struct msg * rsp_recv_next(struct context *ctx, struct conn *conn, bool alloc) { struct msg *msg; ASSERT((conn->type == CONN_DNODE_PEER_SERVER) || (conn->type == CONN_SERVER)); if (conn->eof) { msg = conn->rmsg; if (conn->dyn_mode) { if (conn->non_bytes_recv > MAX_CONN_ALLOWABLE_NON_RECV) { conn->err = EPIPE; return NULL; } conn->eof = 0; return msg; } /* server sent eof before sending the entire request */ if (msg != NULL) { conn->rmsg = NULL; ASSERT(msg->peer == NULL); ASSERT(!msg->request); log_error("eof s %d discarding incomplete rsp %"PRIu64" len " "%"PRIu32"", conn->sd, msg->id, msg->mlen); rsp_put(msg); } /* * We treat TCP half-close from a server different from how we treat * those from a client. On a FIN from a server, we close the connection * immediately by sending the second FIN even if there were outstanding * or pending requests. This is actually a tricky part in the FA, as * we don't expect this to happen unless the server is misbehaving or * it crashes */ conn->done = 1; log_debug(LOG_DEBUG, "s %d active %d is done", conn->sd, conn_active(conn)); return NULL; } msg = conn->rmsg; if (msg != NULL) { ASSERT(!msg->request); return msg; } if (!alloc) { return NULL; } msg = rsp_get(conn); if (msg != NULL) { conn->rmsg = msg; } return msg; }
void server_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ ASSERT(conn->type == CONN_SERVER); server_close_stats(ctx, conn->owner, conn->err, conn->eof, conn->connected); if (conn->sd < 0) { server_failure(ctx, conn->owner); conn_unref(conn); conn_put(conn); return; } for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server outq */ conn_dequeue_outq(ctx, conn, msg); server_ack_err(ctx, conn, msg); } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server inq */ conn_dequeue_inq(ctx, conn, msg); // We should also remove the msg from the timeout rbtree. msg_tmo_delete(msg); server_ack_err(ctx, conn, msg); stats_server_incr(ctx, conn->owner, server_dropped_requests); } ASSERT(TAILQ_EMPTY(&conn->imsg_q)); msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(!msg->request); ASSERT(msg->peer == NULL); rsp_put(msg); log_debug(LOG_INFO, "close s %d discarding rsp %"PRIu64" len %"PRIu32" " "in error", conn->sd, msg->id, msg->mlen); } ASSERT(conn->smsg == NULL); server_failure(ctx, conn->owner); conn_unref(conn); status = close(conn->sd); if (status < 0) { log_error("close s %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
struct msg * rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *pmsg; /* response and it's peer request */ ASSERT(conn->client && !conn->proxy); pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL || !req_done(conn, pmsg)) { /* nothing is outstanding, initiate close? */ if (pmsg == NULL && conn->eof) { conn->done = 1; log_debug(LOG_INFO, "c %d is done", conn->sd); } status = event_del_out(ctx->evb, conn); if (status != NC_OK) { conn->err = errno; } return NULL; } msg = conn->smsg; if (msg != NULL) { log_error("msg was not null %d", msg->id); ASSERT(!msg->request && msg->peer != NULL); ASSERT(req_done(conn, msg->peer)); pmsg = TAILQ_NEXT(msg->peer, c_tqe); } if (pmsg == NULL || !req_done(conn, pmsg)) { conn->smsg = NULL; return NULL; } if (pmsg->duplicate) { conn->dequeue_outq(ctx, conn, pmsg); rsp_put(msg); conn->smsg = NULL; return NULL; } ASSERT(pmsg->request && !pmsg->swallow); if (req_error(conn, pmsg)) { msg = rsp_make_error(ctx, conn, pmsg); if (msg == NULL) { conn->err = errno; return NULL; } msg->peer = pmsg; pmsg->peer = msg; stats_pool_incr(ctx, conn->owner, forward_error); } else { msg = pmsg->peer; } log_error("here4"); ASSERT(!msg->request); log_error("here5"); conn->smsg = msg; log_error("send next rsp %"PRIu64" on c %d", msg->id, conn->sd); return msg; }
/* There are chances that the request to the remote peer or its response got dropped. * Hence we may not always receive a response to the request at the head of the FIFO. * Hence what we do is we mark that request as errored and move on the next one * in the outgoing queue. This works since we always have message ids in monotonically * increasing order. */ static void dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; ASSERT(peer_conn->type == CONN_DNODE_PEER_SERVER); /* response from a peer implies that peer is ok and heartbeating */ dnode_peer_ok(ctx, peer_conn); /* dequeue peer message (request) from peer conn */ while (true) { req = TAILQ_FIRST(&peer_conn->omsg_q); log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp); c_conn = req->owner; if (!peer_conn->same_dc && req->remote_region_send_time) { struct stats *st = ctx->stats; uint64_t delay = dn_usec_now() - req->remote_region_send_time; histo_add(&st->cross_region_histo, delay); } if (req->id == rsp->dmsg->id) { dnode_rsp_forward_match(ctx, peer_conn, rsp); return; } // Report a mismatch and try to rectify log_error("MISMATCH: dnode %s %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u", conn_get_type_string(peer_conn), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); if (c_conn && conn_to_ctx(c_conn)) stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner, peer_mismatch_requests); // TODO : should you be worried about message id getting wrapped around to 0? if (rsp->dmsg->id < req->id) { // We received a response from the past. This indeed proves out of order // responses. A blunder to the architecture. Log it and drop the response. log_error("MISMATCH: received response from the past. Dropping it"); rsp_put(rsp); return; } if (req->consistency == DC_ONE) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_error("MISMATCHED DNODE RSP RECEIVED %s %d dmsg->id %u req %u:%u rsp %u:%u, skipping....", conn_get_type_string(peer_conn), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("skipping req: "); msg_dump(req); } conn_dequeue_outq(ctx, peer_conn, req); req->done = 1; // Create an appropriate response for the request so its propagated up; struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store, __FUNCTION__); err_rsp->error = req->error = 1; err_rsp->err = req->err = BAD_FORMAT; err_rsp->dyn_error = req->dyn_error = BAD_FORMAT; err_rsp->dmsg = dmsg_get(); err_rsp->dmsg->id = req->id; log_debug(LOG_VERB, "%p <-> %p", req, err_rsp); /* establish err_rsp <-> req (response <-> request) link */ req->peer = err_rsp; err_rsp->peer = req; log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u", peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id); rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, err_rsp); IGNORE_RET_VAL(status); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }