rstatus_t req_enqueue(struct context *ctx, struct conn *s_conn, struct conn *c_conn, struct msg *msg) { rstatus_t status; /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { status = event_add_out(ctx->evb, s_conn); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return status; } } if (s_conn->need_auth) { status = msg->add_auth(ctx, c_conn, s_conn); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return status; } } s_conn->enqueue_inq(ctx, s_conn, msg); return NC_OK; }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { rstatus_t status; struct conn *s_conn; struct server_pool *pool; uint8_t *key; uint32_t keylen; struct keypos *kpos; ASSERT(c_conn->client && !c_conn->proxy); /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } pool = c_conn->owner; ASSERT(array_n(msg->keys) > 0); kpos = array_get(msg->keys, 0); key = kpos->start; keylen = (uint32_t)(kpos->end - kpos->start); s_conn = server_pool_conn(ctx, c_conn->owner, key, keylen); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } ASSERT(!s_conn->client && !s_conn->proxy); /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { status = event_add_out(ctx->evb, s_conn); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } if (s_conn->need_auth) { status = msg->add_auth(ctx, c_conn, s_conn); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } s_conn->enqueue_inq(ctx, s_conn, msg); req_forward_stats(ctx, s_conn->owner, msg); log_debug(LOG_VERB, "forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); }
void req_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg) { rstatus_t status; struct server_pool *pool; struct msg_tqh frag_msgq; struct msg *sub_msg; struct msg *tmsg; /* tmp next message */ ASSERT(conn->client && !conn->proxy); ASSERT(msg->request); ASSERT(msg->owner == conn); ASSERT(conn->rmsg == msg); ASSERT(nmsg == NULL || nmsg->request); /* enqueue next message (request), if any */ conn->rmsg = nmsg; if (req_filter(ctx, conn, msg)) { return; } /* do fragment */ pool = conn->owner; TAILQ_INIT(&frag_msgq); status = msg->fragment(msg, pool->ncontinuum, &frag_msgq); if (status != NC_OK) { if (!msg->noreply) { conn->enqueue_outq(ctx, conn, msg); } req_forward_error(ctx, conn, msg); } /* if no fragment happened */ if (TAILQ_EMPTY(&frag_msgq)) { req_forward(ctx, conn, msg); return; } status = req_make_reply(ctx, conn, msg); if (status != NC_OK) { if (!msg->noreply) { conn->enqueue_outq(ctx, conn, msg); } req_forward_error(ctx, conn, msg); } for (sub_msg = TAILQ_FIRST(&frag_msgq); sub_msg != NULL; sub_msg = tmsg) { tmsg = TAILQ_NEXT(sub_msg, m_tqe); TAILQ_REMOVE(&frag_msgq, sub_msg, m_tqe); req_forward(ctx, conn, sub_msg); } ASSERT(TAILQ_EMPTY(&frag_msgq)); return; }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { rstatus_t status; struct conn *s_conn; struct server_pool *pool; uint8_t *key; uint32_t keylen; struct keypos *kpos; ASSERT(c_conn->client && !c_conn->proxy); /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } pool = c_conn->owner; ASSERT(array_n(msg->keys) > 0); kpos = array_get(msg->keys, 0); key = kpos->start; keylen = (uint32_t)(kpos->end - kpos->start); s_conn = msg->routing(ctx, pool, msg, key, keylen); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } ASSERT(!s_conn->client && !s_conn->proxy); status = req_enqueue(ctx, s_conn, c_conn, msg); if (status != NC_OK) { req_put(msg); return; } req_forward_stats(ctx, s_conn->owner, msg); log_debug(LOG_VERB, "forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); return; }
static void admin_local_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); struct node *peer = dnode_peer_pool_server(ctx, c_conn->owner, rack, key, keylen, msg->msg_routing); if (!peer->is_local) { send_rsp_integer(ctx, c_conn, msg); return; } struct conn *p_conn = dnode_peer_pool_server_conn(ctx, peer); if (p_conn == NULL) { c_conn->err = EHOSTDOWN; req_forward_error(ctx, c_conn, msg, c_conn->err); return; } log_debug(LOG_NOTICE, "Need to delete [%.*s] ", keylen, key); local_req_forward(ctx, c_conn, msg, key, keylen); }
void remote_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { struct conn *p_conn; ASSERT(c_conn->client || c_conn->dnode_client); p_conn = dnode_peer_pool_conn(ctx, c_conn->owner, rack, key, keylen, msg->msg_type); if (p_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } //jeb - check if s_conn is _this_ node, and if so, get conn from server_pool_conn instead struct server *peer = p_conn->owner; if (peer->is_local) { local_req_forward(ctx, c_conn, msg, key, keylen); return; } else { dnode_peer_req_forward(ctx, c_conn, p_conn, msg, rack, key, keylen); } }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { rstatus_t status; struct conn *s_conn; struct server_pool *pool; uint8_t *key; uint32_t keylen; ASSERT(c_conn->client && !c_conn->proxy); /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } pool = c_conn->owner; key = NULL; keylen = 0; /* * If hash_tag: is configured for this server pool, we use the part of * the key within the hash tag as an input to the distributor. Otherwise * we use the full key */ if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = nc_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = nc_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } s_conn = server_pool_conn(ctx, c_conn->owner, key, keylen); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } ASSERT(!s_conn->client && !s_conn->proxy); /* enqueue the message (request) into server inq */ status = event_add_out_with_conn(ctx, s_conn, msg); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } req_forward_stats(ctx, s_conn->owner, msg); log_debug(LOG_VERB, "forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); }
void local_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, uint8_t *key, uint32_t keylen) { rstatus_t status; struct conn *s_conn; if (log_loggable(LOG_VVERB)) { loga("local_req_forward entering ............"); } ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue message (request) into client outq, if response is expected */ if (msg->expect_datastore_reply) { conn_enqueue_outq(ctx, c_conn, msg); } s_conn = get_datastore_conn(ctx, c_conn->owner); log_debug(LOG_VERB, "c_conn %p got server conn %p", c_conn, s_conn); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg, errno); return; } ASSERT(s_conn->type == CONN_SERVER); if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to storage conn '%s'", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(s_conn->sd)); } if (ctx->dyn_state == NORMAL) { /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg, errno); s_conn->err = errno; return; } } } else if (ctx->dyn_state == STANDBY) { //no reads/writes from peers/clients log_debug(LOG_INFO, "Node is in STANDBY state. Drop write/read requests"); req_forward_error(ctx, c_conn, msg, errno); return; } else if (ctx->dyn_state == WRITES_ONLY && msg->is_read) { //no reads from peers/clients but allow writes from peers/clients log_debug(LOG_INFO, "Node is in WRITES_ONLY state. Drop read requests"); req_forward_error(ctx, c_conn, msg, errno); return; } else if (ctx->dyn_state == RESUMING) { log_debug(LOG_INFO, "Node is in RESUMING state. Still drop read requests and flush out all the queued writes"); if (msg->is_read) { req_forward_error(ctx, c_conn, msg, errno); return; } status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg, errno); s_conn->err = errno; return; } } conn_enqueue_inq(ctx, s_conn, msg); req_forward_stats(ctx, msg); if(g_data_store == DATA_REDIS){ req_redis_stats(ctx, msg); } if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "local forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
void local_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, uint8_t *key, uint32_t keylen) { rstatus_t status; struct conn *s_conn; if (get_tracking_level() >= LOG_VVERB) { loga("local_req_forward entering ............"); } ASSERT((c_conn->client || c_conn->dnode_client) && !c_conn->proxy && !c_conn->dnode_server); if (c_conn->dyn_mode && !c_conn->same_dc && !msg->is_read) { msg->noreply = 1; } /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } s_conn = server_pool_conn(ctx, c_conn->owner, key, keylen); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } ASSERT(!s_conn->client && !s_conn->proxy); if (ctx->dyn_state == NORMAL) { /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } } else if (ctx->dyn_state == STANDBY) { //no reads/writes from peers/clients log_debug(LOG_VERB, "Node is in STANDBY state. Drop write/read requests"); req_forward_error(ctx, c_conn, msg); return; } else if (ctx->dyn_state == WRITES_ONLY && msg->is_read) { //no reads from peers/clients but allow writes from peers/clients log_debug(LOG_VERB, "Node is in WRITES_ONLY state. Drop read requests"); req_forward_error(ctx, c_conn, msg); return; } else if (ctx->dyn_state == RESUMING) { log_debug(LOG_VERB, "Node is in RESUMING state. Still drop read requests and flush out all the queued writes"); if (msg->is_read) { req_forward_error(ctx, c_conn, msg); return; } status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } s_conn->enqueue_inq(ctx, s_conn, msg); req_forward_stats(ctx, s_conn->owner, msg); log_debug(LOG_VERB, "local forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); }
void req_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg) { rstatus_t status; struct server_pool *pool; struct msg_tqh frag_msgq; struct msg *sub_msg; struct msg *tmsg; /* tmp next message */ ASSERT(conn->client && !conn->proxy); ASSERT(msg->request); ASSERT(msg->owner == conn); ASSERT(conn->rmsg == msg); ASSERT(nmsg == NULL || nmsg->request); //如果读取出来的KV都是完整的,则conn->rmsg = NULL,如果读取内核协议栈缓冲区的数据最好一个KV没有读取完整,则conn->rmsg = nmsg(也就是新的一个msg) /* enqueue next message (request), if any */ conn->rmsg = nmsg; if (req_filter(ctx, conn, msg)) { return; //客户端发送了quit命令过来,则不用再处理KV对了 } if (msg->noforward) { //不需要转到后端服务器,因为没有认证成功 status = req_make_reply(ctx, conn, msg); if (status != NC_OK) { conn->err = errno; return; } status = msg->reply(msg); if (status != NC_OK) { conn->err = errno; return; } //通过core_core中的写事件触发写操作 status = event_add_out(ctx->evb, conn); if (status != NC_OK) { conn->err = errno; } return; } /* do fragment */ pool = conn->owner; TAILQ_INIT(&frag_msgq); //分片 mget mset等批处理命令中的不同KV可能分布在后端不同服务器上因此需要拆分 status = msg->fragment(msg, pool->ncontinuum, &frag_msgq);//如果需要分发到多个后端服务器,则frag_msgq不为空 if (status != NC_OK) { if (!msg->noreply) { conn->enqueue_outq(ctx, conn, msg); } req_forward_error(ctx, conn, msg); } /* if no fragment happened */ if (TAILQ_EMPTY(&frag_msgq)) {//如果需要分发到多个后端服务器,则frag_msgq不为空 req_forward(ctx, conn, msg); //转到后端服务器 return; } status = req_make_reply(ctx, conn, msg); if (status != NC_OK) { if (!msg->noreply) { conn->enqueue_outq(ctx, conn, msg); } req_forward_error(ctx, conn, msg); } for (sub_msg = TAILQ_FIRST(&frag_msgq); sub_msg != NULL; sub_msg = tmsg) { tmsg = TAILQ_NEXT(sub_msg, m_tqe); TAILQ_REMOVE(&frag_msgq, sub_msg, m_tqe); req_forward(ctx, conn, sub_msg); // } ASSERT(TAILQ_EMPTY(&frag_msgq)); return; }
//转发到后端服务器 static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { rstatus_t status; struct conn *s_conn; struct server_pool *pool; uint8_t *key; uint32_t keylen; struct keypos *kpos; ASSERT(c_conn->client && !c_conn->proxy); /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); //req_forward把msg入队到客户端连接c_conn->enqueue_outq req_send_done把msg入队到服务端连接s_conn->enqueue_outq } pool = c_conn->owner; ASSERT(array_n(msg->keys) > 0); kpos = array_get(msg->keys, 0); key = kpos->start; keylen = (uint32_t)(kpos->end - kpos->start); //选举后端服务器并建立连接 s_conn = server_pool_conn(ctx, c_conn->owner, key, keylen); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg); return; } ASSERT(!s_conn->client && !s_conn->proxy); /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { //现在队列上面没有msg,但是下面会往该队列加msg,往后端的队列上面有msg,则添加些事件,通过epoll触发发送出去 status = event_add_out(ctx->evb, s_conn); //该写事件触发在core_core中的写事件把imsg_q中的msg发送出去 if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } if (!conn_authenticated(s_conn)) { //现在还没有认证成功,则先进行认证 status = msg->add_auth(ctx, c_conn, s_conn); if (status != NC_OK) { req_forward_error(ctx, c_conn, msg); s_conn->err = errno; return; } } //req_server_enqueue_imsgq s_conn->enqueue_inq(ctx, s_conn, msg);//在core_core中的写事件把imsg_q中的msg发送出去 req_forward_stats(ctx, s_conn->owner, msg); log_debug(LOG_VERB, "forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); }