void req_client_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->client && !conn->proxy); msg->stime_in_microsec = dn_usec_now(); TAILQ_INSERT_TAIL(&conn->omsg_q, msg, c_tqe); }
void req_client_dequeue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->client && !conn->proxy); uint64_t latency = dn_usec_now() - msg->stime_in_microsec; histo_add(latency); TAILQ_REMOVE(&conn->omsg_q, msg, c_tqe); }
static void dnode_peer_failure(struct context *ctx, struct server *server) { struct server_pool *pool = server->owner; int64_t now, next; rstatus_t status; //fix me if (!pool->auto_eject_hosts) { return; } server->failure_count++; log_debug(LOG_VERB, "dyn: peer '%.*s' failure count %"PRIu32" limit %"PRIu32, server->pname.len, server->pname.data, server->failure_count, pool->server_failure_limit); if (server->failure_count < pool->server_failure_limit) { return; } now = dn_usec_now(); if (now < 0) { return; } //fix me //stats_server_set_ts(ctx, server, server_ejected_at, now); //fix me next = now + pool->server_retry_timeout; log_debug(LOG_INFO, "dyn: update peer pool %"PRIu32" '%.*s' to delete peer '%.*s' " "for next %"PRIu32" secs", pool->idx, pool->name.len, pool->name.data, server->pname.len, server->pname.data, pool->server_retry_timeout / 1000 / 1000); stats_pool_incr(ctx, pool, peer_ejects); server->failure_count = 0; server->next_retry = next; status = dnode_peer_pool_run(pool); if (status != DN_OK) { log_error("dyn: updating peer pool %"PRIu32" '%.*s' failed: %s", pool->idx, pool->name.len, pool->name.data, strerror(errno)); } }
rstatus_t dnode_peer_pool_update(struct server_pool *pool) { rstatus_t status; int64_t now; uint32_t pnlive_server; /* prev # live server */ //fix me if (!pool->auto_eject_hosts) { return DN_OK; } //fix me if (pool->next_rebuild == 0LL) { return DN_OK; } now = dn_usec_now(); if (now < 0) { return DN_ERROR; } //fix me if (now <= pool->next_rebuild) { if (pool->nlive_server == 0) { errno = ECONNREFUSED; return DN_ERROR; } return DN_OK; } //fixe me to use anotehr variable pnlive_server = pool->nlive_server; status = dnode_peer_pool_run(pool); if (status != DN_OK) { log_error("dyn: updating peer pool %"PRIu32" with dist %d failed: %s", pool->idx, pool->dist_type, strerror(errno)); return status; } log_debug(LOG_INFO, "dyn: update peer pool %"PRIu32" '%.*s' to add %"PRIu32" servers", pool->idx, pool->name.len, pool->name.data, pool->nlive_server - pnlive_server); return DN_OK; }
void dnode_req_send_done(struct context *ctx, struct conn *conn, struct msg *msg) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_VERB, "dnode_req_send_done entering!!!"); } ASSERT(conn->type == CONN_DNODE_PEER_SERVER); // crashes because dmsg is NULL :( /*log_debug(LOG_DEBUG, "DNODE REQ SEND %s %d dmsg->id %u", conn_get_type_string(conn), conn->sd, msg->dmsg->id);*/ if (!conn->same_dc) msg->remote_region_send_time = dn_usec_now(); req_send_done(ctx, conn, msg); }
static void req_client_dequeue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->type == CONN_CLIENT); if (msg->stime_in_microsec) { usec_t latency = dn_usec_now() - msg->stime_in_microsec; stats_histo_add_latency(ctx, latency); } conn->omsg_count--; histo_add(&ctx->stats->client_out_queue, conn->omsg_count); TAILQ_REMOVE(&conn->omsg_q, msg, c_tqe); log_debug(LOG_VERB, "conn %p dequeue outq %p", conn, msg); }
void req_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg) { ASSERT(conn->type == CONN_CLIENT); ASSERT(msg->request); ASSERT(msg->owner == conn); ASSERT(conn->rmsg == msg); ASSERT(nmsg == NULL || nmsg->request); if (!msg->is_read) stats_histo_add_payloadsize(ctx, msg->mlen); /* enqueue next message (request), if any */ conn->rmsg = nmsg; if (req_filter(ctx, conn, msg)) { return; } msg->stime_in_microsec = dn_usec_now(); req_forward(ctx, conn, msg); }
/* There are chances that the request to the remote peer or its response got dropped. * Hence we may not always receive a response to the request at the head of the FIFO. * Hence what we do is we mark that request as errored and move on the next one * in the outgoing queue. This works since we always have message ids in monotonically * increasing order. */ static void dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; ASSERT(peer_conn->type == CONN_DNODE_PEER_SERVER); /* response from a peer implies that peer is ok and heartbeating */ dnode_peer_ok(ctx, peer_conn); /* dequeue peer message (request) from peer conn */ while (true) { req = TAILQ_FIRST(&peer_conn->omsg_q); log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp); c_conn = req->owner; if (!peer_conn->same_dc && req->remote_region_send_time) { struct stats *st = ctx->stats; uint64_t delay = dn_usec_now() - req->remote_region_send_time; histo_add(&st->cross_region_histo, delay); } if (req->id == rsp->dmsg->id) { dnode_rsp_forward_match(ctx, peer_conn, rsp); return; } // Report a mismatch and try to rectify log_error("MISMATCH: dnode %s %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u", conn_get_type_string(peer_conn), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); if (c_conn && conn_to_ctx(c_conn)) stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner, peer_mismatch_requests); // TODO : should you be worried about message id getting wrapped around to 0? if (rsp->dmsg->id < req->id) { // We received a response from the past. This indeed proves out of order // responses. A blunder to the architecture. Log it and drop the response. log_error("MISMATCH: received response from the past. Dropping it"); rsp_put(rsp); return; } if (req->consistency == DC_ONE) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_error("MISMATCHED DNODE RSP RECEIVED %s %d dmsg->id %u req %u:%u rsp %u:%u, skipping....", conn_get_type_string(peer_conn), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("skipping req: "); msg_dump(req); } conn_dequeue_outq(ctx, peer_conn, req); req->done = 1; // Create an appropriate response for the request so its propagated up; struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store, __FUNCTION__); err_rsp->error = req->error = 1; err_rsp->err = req->err = BAD_FORMAT; err_rsp->dyn_error = req->dyn_error = BAD_FORMAT; err_rsp->dmsg = dmsg_get(); err_rsp->dmsg->id = req->id; log_debug(LOG_VERB, "%p <-> %p", req, err_rsp); /* establish err_rsp <-> req (response <-> request) link */ req->peer = err_rsp; err_rsp->peer = req; log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u", peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id); rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, err_rsp); IGNORE_RET_VAL(status); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }
void remote_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); struct node * peer = dnode_peer_pool_server(ctx, c_conn->owner, rack, key, keylen, msg->msg_routing); if (peer->is_local) { log_debug(LOG_VERB, "c_conn: %p forwarding %d:%d is local", c_conn, msg->id, msg->parent_id); local_req_forward(ctx, c_conn, msg, key, keylen); return; } /* enqueue message (request) into client outq, if response is expected */ if (msg->expect_datastore_reply && !msg->swallow) { conn_enqueue_outq(ctx, c_conn, msg); } // now get a peer connection struct conn *p_conn = dnode_peer_pool_server_conn(ctx, peer); if ((p_conn == NULL) || (p_conn->connecting)) { if (p_conn) { usec_t now = dn_usec_now(); static usec_t next_log = 0; // Log every 1 sec if (now > next_log) { log_warn("still connecting to peer '%.*s'......", peer->endpoint.pname.len, peer->endpoint.pname.data); next_log = now + 1000 * 1000; } } // No response for DC_ONE & swallow if ((msg->consistency == DC_ONE) && (msg->swallow)) { msg_put(msg); return; } // No response for remote dc struct server_pool *pool = c_conn->owner; bool same_dc = is_same_dc(pool, peer)? 1 : 0; if (!same_dc) { msg_put(msg); return; } // All other cases return a response struct msg *rsp = msg_get(c_conn, false, __FUNCTION__); msg->done = 1; rsp->error = msg->error = 1; rsp->err = msg->err = (p_conn ? PEER_HOST_NOT_CONNECTED : PEER_HOST_DOWN); rsp->dyn_error = msg->dyn_error = (p_conn ? PEER_HOST_NOT_CONNECTED: PEER_HOST_DOWN); rsp->dmsg = dmsg_get(); rsp->peer = msg; rsp->dmsg->id = msg->id; log_info("%lu:%lu <-> %lu:%lu Short circuit....", msg->id, msg->parent_id, rsp->id, rsp->parent_id); conn_handle_response(c_conn, msg->parent_id ? msg->parent_id : msg->id, rsp); if (msg->swallow) msg_put(msg); return; } log_debug(LOG_VERB, "c_conn: %p forwarding %d:%d to p_conn %p", c_conn, msg->id, msg->parent_id, p_conn); dnode_peer_req_forward(ctx, c_conn, p_conn, msg, rack, key, keylen); }