static void req_forward_remote_dc(struct context *ctx, struct conn *c_conn, struct msg *msg, struct mbuf *orig_mbuf, uint8_t *key, uint32_t keylen, struct datacenter *dc) { uint32_t rack_cnt = array_n(&dc->racks); if (rack_cnt == 0) return; struct rack *rack = dc->preselected_rack_for_replication; if (rack == NULL) rack = array_get(&dc->racks, 0); struct msg *rack_msg = msg_get(c_conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); msg_put(rack_msg); return; } msg_clone(msg, orig_mbuf, rack_msg); log_info("msg (%d:%d) clone to remote rack msg (%d:%d)", msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id); rack_msg->swallow = true; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); } remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); }
static void core_close_log(struct conn *conn) { char *addrstr; if ((conn->type == CONN_CLIENT) || (conn->type == CONN_DNODE_PEER_CLIENT)) { addrstr = dn_unresolve_peer_desc(conn->sd); } else { addrstr = dn_unresolve_addr(conn->addr, conn->addrlen); } log_debug(LOG_NOTICE, "close %s %d '%s' on event %04"PRIX32" eof %d done " "%d rb %zu sb %zu%c %s", conn_get_type_string(conn), conn->sd, addrstr, conn->events, conn->eof, conn->done, conn->recv_bytes, conn->send_bytes, conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : ""); }
static void req_forward_all_local_racks(struct context *ctx, struct conn *c_conn, struct msg *msg, struct mbuf *orig_mbuf, uint8_t *key, uint32_t keylen, struct datacenter *dc) { //log_debug(LOG_DEBUG, "dc name '%.*s'", // dc->name->len, dc->name->data); uint8_t rack_cnt = (uint8_t)array_n(&dc->racks); uint8_t rack_index; msg->rsp_handler = msg_get_rsp_handler(msg); init_response_mgr(&msg->rspmgr, msg, msg->is_read, rack_cnt, c_conn); log_info("msg %d:%d same DC racks:%d expect replies %d", msg->id, msg->parent_id, rack_cnt, msg->rspmgr.max_responses); for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "rack name '%.*s'", // rack->name->len, rack->name->data); struct msg *rack_msg; // clone message even for local node struct server_pool *pool = c_conn->owner; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(c_conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed " "now, buddy. no inter-rack messages for " "you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); log_info("msg (%d:%d) clone to rack msg (%d:%d)", msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id); rack_msg->swallow = true; } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); } log_debug(LOG_VERB, "c_conn: %p forwarding (%d:%d)", c_conn, rack_msg->id, rack_msg->parent_id); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } }
static void core_close_log(struct conn *conn) { char type, *addrstr; if (conn->client) { type = 'c'; addrstr = dn_unresolve_peer_desc(conn->sd); } else { type = conn->proxy ? 'p' : 's'; addrstr = dn_unresolve_addr(conn->addr, conn->addrlen); } log_debug(LOG_NOTICE, "close %c %d '%s' on event %04"PRIX32" eof %d done " "%d rb %zu sb %zu%c %s", type, conn->sd, addrstr, conn->events, conn->eof, conn->done, conn->recv_bytes, conn->send_bytes, conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : ""); }
static rstatus_t proxy_accept(struct context *ctx, struct conn *p) { rstatus_t status; struct conn *c; int sd; ASSERT(p->proxy && !p->client); ASSERT(p->sd > 0); ASSERT(p->recv_active && p->recv_ready); for (;;) { sd = accept(p->sd, NULL, NULL); if (sd < 0) { if (errno == EINTR) { log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd); continue; } if (errno == EAGAIN || errno == EWOULDBLOCK) { log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd); p->recv_ready = 0; return DN_OK; } /* * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask * it back in when some existing connection gets closed */ log_error("accept on p %d failed: %s", p->sd, strerror(errno)); return DN_ERROR; } break; } c = conn_get(p->owner, true, p->data_store); if (c == NULL) { log_error("get conn for c %d from p %d failed: %s", sd, p->sd, strerror(errno)); status = close(sd); if (status < 0) { log_error("close c %d failed, ignored: %s", sd, strerror(errno)); } return DN_ENOMEM; } c->sd = sd; stats_pool_incr(ctx, c->owner, client_connections); status = dn_set_nonblocking(c->sd); if (status < 0) { log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd, strerror(errno)); c->close(ctx, c); return status; } if (p->family == AF_INET || p->family == AF_INET6) { status = dn_set_tcpnodelay(c->sd); if (status < 0) { log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s", c->sd, p->sd, strerror(errno)); } } status = event_add_conn(ctx->evb, c); if (status < 0) { log_error("event add conn from p %d failed: %s", p->sd, strerror(errno)); c->close(ctx, c); return status; } log_debug(LOG_NOTICE, "accepted c %d on p %d from '%s'", c->sd, p->sd, dn_unresolve_peer_desc(c->sd)); return DN_OK; }
static void dnode_req_forward(struct context *ctx, struct conn *conn, struct msg *msg) { struct server_pool *pool; uint8_t *key; uint32_t keylen; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dnode_req_forward entering "); } log_debug(LOG_DEBUG, "DNODE REQ RECEIVED %s %d dmsg->id %u", conn_get_type_string(conn), conn->sd, msg->dmsg->id); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); pool = conn->owner; key = NULL; keylen = 0; log_debug(LOG_DEBUG, "conn %p adding message %d:%d", conn, msg->id, msg->parent_id); dictAdd(conn->outstanding_msgs_dict, &msg->id, msg); if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } ASSERT(msg->dmsg != NULL); if (msg->dmsg->type == DMSG_REQ) { local_req_forward(ctx, conn, msg, key, keylen); } else if (msg->dmsg->type == DMSG_REQ_FORWARD) { struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr); struct datacenter *dc = server_get_dc(pool, &pool->dc); uint32_t rack_cnt = array_n(&dc->racks); uint32_t rack_index; for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "forwarding to rack '%.*s'", // rack->name->len, rack->name->data); struct msg *rack_msg; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } if (msg_clone(msg, orig_mbuf, rack_msg) != DN_OK) { msg_put(rack_msg); continue; } rack_msg->swallow = true; } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request from conn '%s' to rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(conn->sd), rack->name->len, rack->name->data, rack->dc->len, rack->dc->data); } remote_req_forward(ctx, conn, rack_msg, rack, key, keylen); } } }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { struct server *server = p_conn->owner; log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to peer conn '%s' on rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(p_conn->sd), rack->name->len, rack->name->data, server->dc.len, server->dc.data); struct string *dc = rack->dc; rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply && !msg->swallow) { conn_enqueue_outq(ctx, c_conn, msg); } ASSERT(p_conn->type == CONN_DNODE_PEER_SERVER); ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue the message (request) into peer inq */ status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); req_put(msg); return; } struct server_pool *pool = c_conn->owner; dmsg_type_t msg_type = (string_compare(&pool->dc, dc) != 0)? DMSG_REQ_FORWARD : DMSG_REQ; if (p_conn->dnode_secured) { //Encrypting and adding header for a request if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); } //write dnode header if (ENCRYPTION) { status = dyn_aes_encrypt_msg(msg, p_conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(msg); return; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the msg payload"); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } } else { //write dnode header dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } mbuf_insert_head(&msg->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } conn_enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
void local_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg, uint8_t *key, uint32_t keylen) { rstatus_t status; struct conn *s_conn; if (log_loggable(LOG_VVERB)) { loga("local_req_forward entering ............"); } ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue message (request) into client outq, if response is expected */ if (msg->expect_datastore_reply) { conn_enqueue_outq(ctx, c_conn, msg); } s_conn = get_datastore_conn(ctx, c_conn->owner); log_debug(LOG_VERB, "c_conn %p got server conn %p", c_conn, s_conn); if (s_conn == NULL) { req_forward_error(ctx, c_conn, msg, errno); return; } ASSERT(s_conn->type == CONN_SERVER); if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to storage conn '%s'", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(s_conn->sd)); } if (ctx->dyn_state == NORMAL) { /* enqueue the message (request) into server inq */ if (TAILQ_EMPTY(&s_conn->imsg_q)) { status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg, errno); s_conn->err = errno; return; } } } else if (ctx->dyn_state == STANDBY) { //no reads/writes from peers/clients log_debug(LOG_INFO, "Node is in STANDBY state. Drop write/read requests"); req_forward_error(ctx, c_conn, msg, errno); return; } else if (ctx->dyn_state == WRITES_ONLY && msg->is_read) { //no reads from peers/clients but allow writes from peers/clients log_debug(LOG_INFO, "Node is in WRITES_ONLY state. Drop read requests"); req_forward_error(ctx, c_conn, msg, errno); return; } else if (ctx->dyn_state == RESUMING) { log_debug(LOG_INFO, "Node is in RESUMING state. Still drop read requests and flush out all the queued writes"); if (msg->is_read) { req_forward_error(ctx, c_conn, msg, errno); return; } status = event_add_out(ctx->evb, s_conn); if (status != DN_OK) { req_forward_error(ctx, c_conn, msg, errno); s_conn->err = errno; return; } } conn_enqueue_inq(ctx, s_conn, msg); req_forward_stats(ctx, msg); if(g_data_store == DATA_REDIS){ req_redis_stats(ctx, msg); } if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "local forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
static void req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg) { struct server_pool *pool = c_conn->owner; uint8_t *key; uint32_t keylen; ASSERT(c_conn->client && !c_conn->proxy); if (msg->is_read) stats_pool_incr(ctx, pool, client_read_requests); else stats_pool_incr(ctx, pool, client_write_requests); key = NULL; keylen = 0; if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } // need to capture the initial mbuf location as once we add in the dynomite headers (as mbufs to the src msg), // that will bork the request sent to secondary racks struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr); if (request_send_to_all_racks(msg)) { uint32_t dc_cnt = array_n(&pool->datacenters); uint32_t dc_index; for(dc_index = 0; dc_index < dc_cnt; dc_index++) { struct datacenter *dc = array_get(&pool->datacenters, dc_index); if (dc == NULL) { log_error("Wow, this is very bad, dc is NULL"); return; } if (string_compare(dc->name, &pool->dc) == 0) { //send to all local racks //log_debug(LOG_DEBUG, "dc name '%.*s'", dc->name->len, dc->name->data); uint32_t rack_cnt = array_n(&dc->racks); uint32_t rack_index; for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "rack name '%.*s'", rack->name->len, rack->name->data); struct msg *rack_msg; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(c_conn, msg->request, msg->redis); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); rack_msg->noreply = true; } log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } } else { uint32_t rack_cnt = array_n(&dc->racks); if (rack_cnt == 0) continue; uint32_t ran_index = rand() % rack_cnt; struct rack *rack = array_get(&dc->racks, ran_index); struct msg *rack_msg = msg_get(c_conn, msg->request, msg->redis); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); rack_msg->noreply = true; log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } } } else { //for read only requests struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc); remote_req_forward(ctx, c_conn, msg, rack, key, keylen); } }