static void dnode_client_close_stats(struct context *ctx, struct server_pool *pool, err_t err, unsigned eof) { stats_pool_decr(ctx, dnode_client_connections); if (eof) { //fix this also stats_pool_incr(ctx, dnode_client_eof); return; } switch (err) { case EPIPE: case ETIMEDOUT: case ECONNRESET: case ECONNABORTED: case ENOTCONN: case ENETDOWN: case ENETUNREACH: case EHOSTDOWN: case EHOSTUNREACH: default: //fix this also stats_pool_incr(ctx, dnode_client_err); break; } }
static void dnode_peer_close_stats(struct context *ctx, struct server *server, err_t err, unsigned eof, unsigned connected) { if (connected) { //stats_server_decr(ctx, server, server_connections); stats_pool_decr(ctx, server->owner, peer_connections); } if (eof) { //stats_server_incr(ctx, server, server_eof); stats_pool_incr(ctx, server->owner, peer_eof); return; } switch (err) { case ETIMEDOUT: //stats_server_incr(ctx, server, server_timedout); stats_pool_incr(ctx, server->owner, peer_timedout); break; case EPIPE: case ECONNRESET: case ECONNABORTED: case ECONNREFUSED: case ENOTCONN: case ENETDOWN: case ENETUNREACH: case EHOSTDOWN: case EHOSTUNREACH: default: //stats_server_incr(ctx, server, server_err); stats_pool_incr(ctx, server->owner, peer_err); break; } }
static void core_timeout(struct context *ctx) { for (;;) { struct msg *msg; struct conn *conn; int64_t now, then; msg = msg_tmo_min(); if (msg == NULL) { ctx->timeout = ctx->max_timeout; return; } /* skip over req that are in-error or done */ if (msg->error || msg->done) { msg_tmo_delete(msg); continue; } /* * timeout expired req and all the outstanding req on the timing * out server */ conn = msg->tmo_rbe.data; then = msg->tmo_rbe.key; now = dn_msec_now(); if (now < then) { int delta = (int)(then - now); ctx->timeout = MIN(delta, ctx->max_timeout); return; } log_warn("req %"PRIu64" on %s %d timedout, timeout was %d", msg->id, conn_get_type_string(conn), conn->sd, msg->tmo_rbe.timeout); msg_tmo_delete(msg); if (conn->dyn_mode) { if (conn->type == CONN_DNODE_PEER_SERVER) { //outgoing peer requests struct server *server = conn->owner; if (conn->same_dc) stats_pool_incr(ctx, server->owner, peer_timedout_requests); else stats_pool_incr(ctx, server->owner, remote_peer_timedout_requests); } } else { if (conn->type == CONN_SERVER) { //storage server requests stats_server_incr(ctx, conn->owner, server_dropped_requests); } } conn->err = ETIMEDOUT; core_close(ctx, conn); } }
static void rspmgr_incr_non_quorum_responses_stats(struct response_mgr *rspmgr) { if (rspmgr->is_read) stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner, client_non_quorum_r_responses); else stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner, client_non_quorum_w_responses); }
static void dnode_rsp_forward_stats(struct context *ctx, struct server *server, struct msg *msg) { ASSERT(!msg->request); stats_pool_incr(ctx, server->owner, peer_responses); stats_pool_incr_by(ctx, server->owner, peer_response_bytes, msg->mlen); }
void req_server_enqueue_imsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT((!conn->client && !conn->proxy) || (!conn->dnode_client && !conn->dnode_server)); /* * timeout clock starts ticking the instant the message is enqueued into * the server in_q; the clock continues to tick until it either expires * or the message is dequeued from the server out_q * * noreply request are free from timeouts because client is not interested * in the reponse anyway! */ if (!msg->noreply) { msg_tmo_insert(msg, conn); } TAILQ_INSERT_TAIL(&conn->imsg_q, msg, s_tqe); if (!conn->dyn_mode) { stats_server_incr(ctx, conn->owner, in_queue); stats_server_incr_by(ctx, conn->owner, in_queue_bytes, msg->mlen); } else { struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0); stats_pool_incr(ctx, pool, peer_in_queue); stats_pool_incr_by(ctx, pool, peer_in_queue_bytes, msg->mlen); } }
/** *发送下一个messsage */ struct msg * rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *pmsg; /* response and it's peer request */ ASSERT(conn->client && !conn->proxy); pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL || !req_done(conn, pmsg)) {//pmsg为空 并且请求未完成 /* nothing is outstanding, initiate close? */ if (pmsg == NULL && conn->eof) {//如果消息为假 并且连接eof为真 conn->done = 1;//请求完成 log_debug(LOG_INFO, "c %d is done", conn->sd); } //删除当前的请求 status = event_del_out(ctx->evb, conn); if (status != NC_OK) {//删除失败 conn->err = errno;//设置连接错误码 } return NULL; } //连接发送的信息 msg = conn->smsg; if (msg != NULL) { ASSERT(!msg->request && msg->peer != NULL); ASSERT(req_done(conn, msg->peer)); pmsg = TAILQ_NEXT(msg->peer, c_tqe); } if (pmsg == NULL || !req_done(conn, pmsg)) { conn->smsg = NULL; return NULL; } ASSERT(pmsg->request && !pmsg->swallow); if (req_error(conn, pmsg)) {//如果有error msg = rsp_make_error(ctx, conn, pmsg); if (msg == NULL) { conn->err = errno; return NULL; } msg->peer = pmsg; pmsg->peer = msg; stats_pool_incr(ctx, conn->owner, forward_error); } else { msg = pmsg->peer; } //获取msg ASSERT(!msg->request); conn->smsg = msg; log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", msg->id, conn->sd); return msg; }
void dnode_req_peer_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->type == CONN_DNODE_PEER_SERVER); TAILQ_INSERT_TAIL(&conn->omsg_q, msg, s_tqe); log_debug(LOG_VERB, "conn %p enqueue outq %d:%d", conn, msg->id, msg->parent_id); //use only the 1st pool struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0); if (conn->same_dc) stats_pool_incr(ctx, pool, peer_out_queue); else stats_pool_incr(ctx, pool, remote_peer_out_queue); stats_pool_incr_by(ctx, pool, peer_out_queue_bytes, msg->mlen); }
static void dnode_peer_req_forward_stats(struct context *ctx, struct server *server, struct msg *msg) { ASSERT(msg->request); //use only the 1st pool //struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0); struct server_pool *pool = server->owner; stats_pool_incr(ctx, pool, peer_requests); stats_pool_incr_by(ctx, pool, peer_request_bytes, msg->mlen); }
void dnode_req_client_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); log_debug(LOG_VERB, "conn %p enqueue outq %p", conn, msg); TAILQ_INSERT_TAIL(&conn->omsg_q, msg, c_tqe); //use only the 1st pool struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0); stats_pool_incr(ctx, pool, dnode_client_out_queue); stats_pool_incr_by(ctx, pool, dnode_client_out_queue_bytes, msg->mlen); }
static void dnode_peer_failure(struct context *ctx, struct server *server) { struct server_pool *pool = server->owner; int64_t now, next; rstatus_t status; //fix me if (!pool->auto_eject_hosts) { return; } server->failure_count++; log_debug(LOG_VERB, "dyn: peer '%.*s' failure count %"PRIu32" limit %"PRIu32, server->pname.len, server->pname.data, server->failure_count, pool->server_failure_limit); if (server->failure_count < pool->server_failure_limit) { return; } now = dn_usec_now(); if (now < 0) { return; } //fix me //stats_server_set_ts(ctx, server, server_ejected_at, now); //fix me next = now + pool->server_retry_timeout; log_debug(LOG_INFO, "dyn: update peer pool %"PRIu32" '%.*s' to delete peer '%.*s' " "for next %"PRIu32" secs", pool->idx, pool->name.len, pool->name.data, server->pname.len, server->pname.data, pool->server_retry_timeout / 1000 / 1000); stats_pool_incr(ctx, pool, peer_ejects); server->failure_count = 0; server->next_retry = next; status = dnode_peer_pool_run(pool); if (status != DN_OK) { log_error("dyn: updating peer pool %"PRIu32" '%.*s' failed: %s", pool->idx, pool->name.len, pool->name.data, strerror(errno)); } }
static void dnode_req_client_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); log_debug(LOG_VERB, "conn %p enqueue outq %p", conn, msg); TAILQ_INSERT_TAIL(&conn->omsg_q, msg, c_tqe); //use only the 1st pool conn->omsg_count++; histo_add(&ctx->stats->dnode_client_out_queue, conn->omsg_count); stats_pool_incr(ctx, dnode_client_out_queue); stats_pool_incr_by(ctx, dnode_client_out_queue_bytes, msg->mlen); }
void dnode_req_peer_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(msg->request); ASSERT(!conn->dnode_client && !conn->dnode_server); TAILQ_INSERT_TAIL(&conn->omsg_q, msg, s_tqe); /* stats_server_incr(ctx, conn->owner, out_queue); */ /* stats_server_incr_by(ctx, conn->owner, out_queue_bytes, msg->mlen); */ //use only the 1st pool struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0); stats_pool_incr(ctx, pool, peer_out_queue); stats_pool_incr_by(ctx, pool, peer_out_queue_bytes, msg->mlen); }
void dnode_peer_connected(struct context *ctx, struct conn *conn) { struct server *server = conn->owner; ASSERT(!conn->dnode_server && !conn->dnode_client); ASSERT(conn->connecting && !conn->connected); //fix me //stats_server_incr(ctx, server, server_connections); stats_pool_incr(ctx, server->owner, peer_connections); conn->connecting = 0; conn->connected = 1; log_debug(LOG_INFO, "dyn: peer connected on sd %d to server '%.*s'", conn->sd, server->pname.len, server->pname.data); }
static void client_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ ASSERT(conn->type == CONN_CLIENT); client_close_stats(ctx, conn->owner, conn->err, conn->eof); if (conn->sd < 0) { client_unref(conn); return; } msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(msg->peer == NULL); ASSERT(msg->request && !msg->done); log_debug(LOG_INFO, "close c %d discarding pending req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } ASSERT(conn->smsg == NULL); ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, c_tqe); /* dequeue the message (request) from client outq */ conn_dequeue_outq(ctx, conn, msg); if (msg->done) { log_debug(LOG_INFO, "close c %d discarding %s req %"PRIu64" len " "%"PRIu32" type %d", conn->sd, msg->error ? "error": "completed", msg->id, msg->mlen, msg->type); req_put(msg); } else { msg->swallow = 1; ASSERT(msg->request); ASSERT(msg->peer == NULL); log_debug(LOG_INFO, "close c %d schedule swallow of req %"PRIu64" " "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen, msg->type); } stats_pool_incr(ctx, client_dropped_requests); } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); status = close(conn->sd); if (status < 0) { log_error("close c %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; client_unref(conn); }
void dnode_peer_close(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *nmsg; /* current and next message */ struct conn *c_conn; /* peer client connection */ struct server *server = conn->owner; log_debug(LOG_WARN, "dyn: dnode_peer_close on peer '%.*s'", server->pname.len, server->pname.data); ASSERT(!conn->dnode_server && !conn->dnode_client); dnode_peer_close_stats(ctx, conn->owner, conn->err, conn->eof, conn->connected); if (conn->sd < 0) { dnode_peer_failure(ctx, conn->owner); conn->unref(conn); conn_put(conn); return; } for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server inq */ conn->dequeue_inq(ctx, conn, msg); /* * Don't send any error response, if * 1. request is tagged as noreply or, * 2. client has already closed its connection */ if (msg->swallow || msg->noreply) { log_debug(LOG_INFO, "dyn: close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; ASSERT(c_conn->client && !c_conn->proxy); msg->done = 1; msg->error = 1; msg->err = conn->err; msg->dyn_error = PEER_CONNECTION_REFUSE; if (TAILQ_FIRST(&c_conn->omsg_q) != NULL && req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "dyn: close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } if (!conn->dnode_client) { stats_pool_incr(ctx, server->owner, peer_dropped_requests); } } ASSERT(TAILQ_EMPTY(&conn->imsg_q)); for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { nmsg = TAILQ_NEXT(msg, s_tqe); /* dequeue the message (request) from server outq */ conn->dequeue_outq(ctx, conn, msg); if (msg->swallow) { log_debug(LOG_INFO, "dyn: close s %d swallow req %"PRIu64" len %"PRIu32 " type %d", conn->sd, msg->id, msg->mlen, msg->type); req_put(msg); } else { c_conn = msg->owner; ASSERT(!c_conn->dnode_client && !c_conn->dnode_server); msg->done = 1; msg->error = 1; msg->err = conn->err; if (TAILQ_FIRST(&c_conn->omsg_q) != NULL && req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { event_add_out(ctx->evb, msg->owner); } log_debug(LOG_INFO, "dyn: close s %d schedule error for req %"PRIu64" " "len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id, msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ', conn->err ? strerror(conn->err): " "); } } ASSERT(TAILQ_EMPTY(&conn->omsg_q)); msg = conn->rmsg; if (msg != NULL) { conn->rmsg = NULL; ASSERT(!msg->request); ASSERT(msg->peer == NULL); rsp_put(msg); log_debug(LOG_INFO, "dyn: close s %d discarding rsp %"PRIu64" len %"PRIu32" " "in error", conn->sd, msg->id, msg->mlen); } ASSERT(conn->smsg == NULL); dnode_peer_failure(ctx, conn->owner); conn->unref(conn); status = close(conn->sd); if (status < 0) { log_error("dyn: close s %d failed, ignored: %s", conn->sd, strerror(errno)); } conn->sd = -1; conn_put(conn); }
struct msg * rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *rsp, *req; /* response and it's peer request */ ASSERT_LOG((conn->type == CONN_DNODE_PEER_CLIENT) || (conn->type = CONN_CLIENT), "conn %s", conn_get_type_string(conn)); req = TAILQ_FIRST(&conn->omsg_q); if (req == NULL || (!req->selected_rsp && !req_done(conn, req))) { /* nothing is outstanding, initiate close? */ if (req == NULL && conn->eof) { conn->done = 1; log_debug(LOG_INFO, "c %d is done", conn->sd); } status = event_del_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } return NULL; } rsp = conn->smsg; if (rsp != NULL) { ASSERT(!rsp->request); ASSERT(rsp->peer != NULL); req = TAILQ_NEXT(rsp->peer, c_tqe); } if (req == NULL || !req_done(conn, req)) { conn->smsg = NULL; return NULL; } ASSERT(req->request && !req->swallow); if (req_error(conn, req)) { rsp = rsp_make_error(ctx, conn, req); if (rsp == NULL) { conn->err = errno; return NULL; } rsp->peer = req; req->selected_rsp = rsp; log_debug(LOG_VERB, "creating new error rsp %p", rsp); if (conn->dyn_mode) { stats_pool_incr(ctx, peer_forward_error); } else { stats_pool_incr(ctx, forward_error); } } else { rsp = req->selected_rsp; } ASSERT(!rsp->request); conn->smsg = rsp; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", rsp->id, conn->sd); } return rsp; }
/* There are chances that the request to the remote peer or its response got dropped. * Hence we may not always receive a response to the request at the head of the FIFO. * Hence what we do is we mark that request as errored and move on the next one * in the outgoing queue. This works since we always have message ids in monotonically * increasing order. */ static void dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; ASSERT(!peer_conn->dnode_client && !peer_conn->dnode_server); /* response from a peer implies that peer is ok and heartbeating */ dnode_peer_ok(ctx, peer_conn); /* dequeue peer message (request) from peer conn */ while (true) { req = TAILQ_FIRST(&peer_conn->omsg_q); log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp); c_conn = req->owner; if (req->id == rsp->dmsg->id) { dnode_rsp_forward_match(ctx, peer_conn, rsp); return; } // Report a mismatch and try to rectify log_error("MISMATCH: dnode %c %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); if (c_conn && conn_to_ctx(c_conn)) stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner, peer_mismatch_requests); // TODO : should you be worried about message id getting wrapped around to 0? if (rsp->dmsg->id < req->id) { // We received a response from the past. This indeed proves out of order // responses. A blunder to the architecture. Log it and drop the response. log_error("MISMATCH: received response from the past. Dropping it"); dnode_rsp_put(rsp); return; } if (req->consistency == DC_ONE) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_error("MISMATCHED DNODE RSP RECEIVED %c %d dmsg->id %u req %u:%u rsp %u:%u, skipping....", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("skipping req: "); msg_dump(req); } peer_conn->dequeue_outq(ctx, peer_conn, req); req->done = 1; // Create an appropriate response for the request so its propagated up; struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store); err_rsp->error = req->error = 1; err_rsp->err = req->err = BAD_FORMAT; err_rsp->dyn_error = req->dyn_error = BAD_FORMAT; err_rsp->dmsg = dmsg_get(); err_rsp->dmsg->id = req->id; log_debug(LOG_VERB, "%p <-> %p", req, err_rsp); /* establish err_rsp <-> req (response <-> request) link */ req->peer = err_rsp; err_rsp->peer = req; log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u", peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id); rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, err_rsp); IGNORE_RET_VAL(status); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { struct server_pool *pool = c_conn->owner; uint8_t *key; uint32_t keylen; ASSERT(c_conn->client && !c_conn->proxy); if (msg->is_read) stats_pool_incr(ctx, pool, client_read_requests); else stats_pool_incr(ctx, pool, client_write_requests); key = NULL; keylen = 0; if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } // need to capture the initial mbuf location as once we add in the dynomite headers (as mbufs to the src msg), // that will bork the request sent to secondary racks struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr); if (request_send_to_all_racks(msg)) { uint32_t dc_cnt = array_n(&pool->datacenters); uint32_t dc_index; for(dc_index = 0; dc_index < dc_cnt; dc_index++) { struct datacenter *dc = array_get(&pool->datacenters, dc_index); if (dc == NULL) { log_error("Wow, this is very bad, dc is NULL"); return; } if (string_compare(dc->name, &pool->dc) == 0) { //send to all local racks //log_debug(LOG_DEBUG, "dc name '%.*s'", dc->name->len, dc->name->data); uint32_t rack_cnt = array_n(&dc->racks); uint32_t rack_index; for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "rack name '%.*s'", rack->name->len, rack->name->data); struct msg *rack_msg; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(c_conn, msg->request, msg->redis); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); rack_msg->noreply = true; } log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } } else { uint32_t rack_cnt = array_n(&dc->racks); if (rack_cnt == 0) continue; uint32_t ran_index = rand() % rack_cnt; struct rack *rack = array_get(&dc->racks, ran_index); struct msg *rack_msg = msg_get(c_conn, msg->request, msg->redis); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); rack_msg->noreply = true; log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } } } else { //for read only requests struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc); remote_req_forward(ctx, c_conn, msg, rack, key, keylen); } }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { struct server_pool *pool = c_conn->owner; uint8_t *key; uint32_t keylen; ASSERT(c_conn->type == CONN_CLIENT); if (msg->is_read) { if (msg->type != MSG_REQ_REDIS_PING) stats_pool_incr(ctx, client_read_requests); } else stats_pool_incr(ctx, client_write_requests); key = NULL; keylen = 0; // add the message to the dict log_debug(LOG_DEBUG, "conn %p adding message %d:%d", c_conn, msg->id, msg->parent_id); dictAdd(c_conn->outstanding_msgs_dict, &msg->id, msg); if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } // need to capture the initial mbuf location as once we add in the dynomite // headers (as mbufs to the src msg), that will bork the request sent to // secondary racks struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr); if (ctx->admin_opt == 1) { if (msg->type == MSG_REQ_REDIS_DEL || msg->type == MSG_REQ_MC_DELETE) { struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc); admin_local_req_forward(ctx, c_conn, msg, rack, key, keylen); return; } } if (msg->msg_routing == ROUTING_LOCAL_NODE_ONLY) { // Strictly local host only msg->consistency = DC_ONE; msg->rsp_handler = msg_local_one_rsp_handler; local_req_forward(ctx, c_conn, msg, key, keylen); return; } if (msg->is_read) { msg->consistency = conn_get_read_consistency(c_conn); } else { msg->consistency = conn_get_write_consistency(c_conn); } /* forward the request */ uint32_t dc_cnt = array_n(&pool->datacenters); uint32_t dc_index; for(dc_index = 0; dc_index < dc_cnt; dc_index++) { struct datacenter *dc = array_get(&pool->datacenters, dc_index); if (dc == NULL) { log_error("Wow, this is very bad, dc is NULL"); return; } if (string_compare(dc->name, &pool->dc) == 0) req_forward_local_dc(ctx, c_conn, msg, orig_mbuf, key, keylen, dc); else if (request_send_to_all_dcs(msg)) { req_forward_remote_dc(ctx, c_conn, msg, orig_mbuf, key, keylen, dc); } } }
static rstatus_t proxy_accept(struct context *ctx, struct conn *p) { rstatus_t status; struct conn *c; int sd; struct sockaddr_storage addr; socklen_t addr_len; ASSERT(p->proxy && !p->client); ASSERT(p->sd > 0); ASSERT(p->recv_active && p->recv_ready); for (;;) { sd = accept(p->sd, NULL, NULL); if (sd < 0) { if (errno == EINTR) { log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd); continue; } if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ECONNABORTED) { log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd); p->recv_ready = 0; return NC_OK; } /* * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask * it back in when some existing connection gets closed */ /* * Workaround of https://github.com/twitter/twemproxy/issues/97 * Just ignore EMFILE/ENFILE, return NC_OK will enable the server * continue to run instead of close the server socket */ if (errno == EMFILE || errno == ENFILE) { log_crit("accept on p %d failed: %s", p->sd, strerror(errno)); p->recv_ready = 0; log_crit("connections status: rlimit nofile %d, " "used connections: %d, max client connections %d, " "curr client connections %d", ctx->rlimit_nofile, conn_ncurr(), ctx->max_ncconn, conn_ncurr_cconn()); /* Since we maintain a safe max_ncconn and check * it after every accept, we should not reach here. * So we will panic after this log */ log_panic("HIT MAX OPEN FILES, IT SHOULD NOT HAPPEN. ABORT."); return NC_OK; } log_error("accept on p %d failed: %s", p->sd, strerror(errno)); return NC_ERROR; } addr_len = sizeof(addr); if (getsockname(sd, (struct sockaddr *)&addr, &addr_len)) { log_error("getsockname on p %d failed: %s", p->sd, strerror(errno)); close(sd); continue; } break; } if (conn_ncurr_cconn() >= ctx->max_ncconn) { stats_pool_incr(ctx, p->owner, rejected_connections); log_crit("client connections %d exceed limit %d", conn_ncurr_cconn(), ctx->max_ncconn); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return NC_OK; } c = conn_get(p->owner, true, p->redis); if (c == NULL) { log_error("get conn for c %d from p %d failed: %s", sd, p->sd, strerror(errno)); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return NC_ENOMEM; } c->sd = sd; c->family = addr.ss_family; c->addrlen = addr_len; c->ss = addr; c->addr = (struct sockaddr *)&c->ss; stats_pool_incr(ctx, c->owner, client_connections); status = nc_set_nonblocking(c->sd); if (status < 0) { log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd, strerror(errno)); c->close(ctx, c); return status; } if (p->family == AF_INET || p->family == AF_INET6) { status = nc_set_tcpnodelay(c->sd); if (status < 0) { log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s", c->sd, p->sd, strerror(errno)); } } status = event_add_conn(ctx->evb, c); if (status < 0) { log_error("event add conn from p %d failed: %s", p->sd, strerror(errno)); c->close(ctx, c); return status; } log_notice("accepted c %d on p %d from '%s'", c->sd, p->sd, nc_unresolve_peer_desc(c->sd)); return NC_OK; }
static rstatus_t proxy_accept(struct context *ctx, struct conn *p) { rstatus_t status; struct conn *c; int sd; ASSERT(p->proxy && !p->client); ASSERT(p->sd > 0); ASSERT(p->recv_active && p->recv_ready); for (;;) { sd = accept(p->sd, NULL, NULL); if (sd < 0) { if (errno == EINTR) { log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd); continue; } if (errno == EAGAIN || errno == EWOULDBLOCK) { log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd); p->recv_ready = 0; return DN_OK; } /* * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask * it back in when some existing connection gets closed */ log_error("accept on p %d failed: %s", p->sd, strerror(errno)); return DN_ERROR; } break; } c = conn_get(p->owner, true, p->data_store); if (c == NULL) { log_error("get conn for c %d from p %d failed: %s", sd, p->sd, strerror(errno)); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return DN_ENOMEM; } c->sd = sd; stats_pool_incr(ctx, c->owner, client_connections); status = dn_set_nonblocking(c->sd); if (status < 0) { log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd, strerror(errno)); c->close(ctx, c); return status; } if (p->family == AF_INET || p->family == AF_INET6) { status = dn_set_tcpnodelay(c->sd); if (status < 0) { log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s", c->sd, p->sd, strerror(errno)); } } status = event_add_conn(ctx->evb, c); if (status < 0) { log_error("event add conn from p %d failed: %s", p->sd, strerror(errno)); c->close(ctx, c); return status; } log_debug(LOG_NOTICE, "accepted c %d on p %d from '%s'", c->sd, p->sd, dn_unresolve_peer_desc(c->sd)); return DN_OK; }
//接收到客户端连接后,返回新的fd,为该fd创建新的conn来读取数据 static rstatus_t proxy_accept(struct context *ctx, struct conn *p) //p对应的是proxy conn 也就是用于监听客户端的conn信息 { rstatus_t status; struct conn *c; int sd; struct server_pool *pool = p->owner; ASSERT(p->proxy && !p->client); ASSERT(p->sd > 0); ASSERT(p->recv_active && p->recv_ready); for (;;) { sd = accept(p->sd, NULL, NULL); //获取到新的客户端连接,产生新的fd if (sd < 0) { if (errno == EINTR) { log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd); continue; } if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ECONNABORTED) { log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd); p->recv_ready = 0; return NC_OK; } /* * Workaround of https://github.com/twitter/twemproxy/issues/97 * * We should never reach here because the check for conn_ncurr_cconn() * against ctx->max_ncconn should catch this earlier in the cycle. * If we reach here ignore EMFILE/ENFILE, return NC_OK will enable * the server continue to run instead of close the server socket * * The right solution however, is on EMFILE/ENFILE to mask out IN * event on the proxy and mask it back in when some existing * connections gets closed */ if (errno == EMFILE || errno == ENFILE) { log_debug(LOG_CRIT, "accept on p %d with max fds %"PRIu32" " "used connections %"PRIu32" max client connections %"PRIu32" " "curr client connections %"PRIu32" failed: %s", p->sd, ctx->max_nfd, conn_ncurr_conn(), ctx->max_ncconn, conn_ncurr_cconn(), strerror(errno)); p->recv_ready = 0; return NC_OK; } log_error("accept on p %d failed: %s", p->sd, strerror(errno)); return NC_ERROR; } break; } if (conn_ncurr_cconn() >= ctx->max_ncconn) { log_debug(LOG_CRIT, "client connections %"PRIu32" exceed limit %"PRIu32, conn_ncurr_cconn(), ctx->max_ncconn); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return NC_OK; } c = conn_get(p->owner, true, p->redis); if (c == NULL) { log_error("get conn for c %d from p %d failed: %s", sd, p->sd, strerror(errno)); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return NC_ENOMEM; } c->sd = sd; stats_pool_incr(ctx, c->owner, client_connections); status = nc_set_nonblocking(c->sd); if (status < 0) { log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd, strerror(errno)); c->close(ctx, c); return status; } if (pool->tcpkeepalive) { status = nc_set_tcpkeepalive(c->sd); if (status < 0) { log_warn("set tcpkeepalive on c %d from p %d failed, ignored: %s", c->sd, p->sd, strerror(errno)); } } if (p->family == AF_INET || p->family == AF_INET6) { status = nc_set_tcpnodelay(c->sd); if (status < 0) { log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s", c->sd, p->sd, strerror(errno)); } } status = event_add_conn(ctx->evb, c); if (status < 0) { log_error("event add conn from p %d failed: %s", p->sd, strerror(errno)); c->close(ctx, c); return status; } log_debug(LOG_INFO, "accepted c %d on p %d from '%s'", c->sd, p->sd, nc_unresolve_peer_desc(c->sd)); return NC_OK; }
struct msg * rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *msg, *pmsg; /* response and it's peer request */ ASSERT((conn->client && !conn->proxy) || (conn->dnode_client && !conn->dnode_server)); pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL || !req_done(conn, pmsg)) { /* nothing is outstanding, initiate close? */ if (pmsg == NULL && conn->eof) { conn->done = 1; log_debug(LOG_INFO, "c %d is done", conn->sd); } status = event_del_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } return NULL; } msg = conn->smsg; if (msg != NULL) { ASSERT(!msg->request && msg->peer != NULL); ASSERT(req_done(conn, msg->peer)); pmsg = TAILQ_NEXT(msg->peer, c_tqe); } if (pmsg == NULL || !req_done(conn, pmsg)) { conn->smsg = NULL; return NULL; } ASSERT(pmsg->request && !pmsg->swallow); if (req_error(conn, pmsg)) { msg = rsp_make_error(ctx, conn, pmsg); if (msg == NULL) { conn->err = errno; return NULL; } msg->peer = pmsg; pmsg->peer = msg; if (!conn->dyn_mode) { stats_pool_incr(ctx, conn->owner, forward_error); } else { //dyn_mode stats_pool_incr(ctx, conn->owner, peer_forward_error); } } else { msg = pmsg->peer; } ASSERT(!msg->request); conn->smsg = msg; log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", msg->id, conn->sd); return msg; }