示例#1
0
static void
req_forward_remote_dc(struct context *ctx, struct conn *c_conn, struct msg *msg,
                      struct mbuf *orig_mbuf, uint8_t *key, uint32_t keylen,
                      struct datacenter *dc)
{
    uint32_t rack_cnt = array_n(&dc->racks);
    if (rack_cnt == 0)
        return;

    struct rack *rack = dc->preselected_rack_for_replication;
    if (rack == NULL)
        rack = array_get(&dc->racks, 0);

    struct msg *rack_msg = msg_get(c_conn, msg->request, __FUNCTION__);
    if (rack_msg == NULL) {
        log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
        msg_put(rack_msg);
        return;
    }

    msg_clone(msg, orig_mbuf, rack_msg);
    log_info("msg (%d:%d) clone to remote rack msg (%d:%d)",
            msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id);
    rack_msg->swallow = true;

    if (log_loggable(LOG_DEBUG)) {
        log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
                dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);
    }
    remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
}
示例#2
0
static void
core_close_log(struct conn *conn)
{
	char *addrstr;

	if ((conn->type == CONN_CLIENT) || (conn->type == CONN_DNODE_PEER_CLIENT)) {
		addrstr = dn_unresolve_peer_desc(conn->sd);
	} else {
		addrstr = dn_unresolve_addr(conn->addr, conn->addrlen);
	}
	log_debug(LOG_NOTICE, "close %s %d '%s' on event %04"PRIX32" eof %d done "
			  "%d rb %zu sb %zu%c %s", conn_get_type_string(conn), conn->sd,
              addrstr, conn->events, conn->eof, conn->done, conn->recv_bytes,
              conn->send_bytes,
              conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : "");

}
示例#3
0
static void
req_forward_all_local_racks(struct context *ctx, struct conn *c_conn,
                            struct msg *msg, struct mbuf *orig_mbuf,
                            uint8_t *key, uint32_t keylen, struct datacenter *dc)
{
    //log_debug(LOG_DEBUG, "dc name  '%.*s'",
    //            dc->name->len, dc->name->data);
    uint8_t rack_cnt = (uint8_t)array_n(&dc->racks);
    uint8_t rack_index;
    msg->rsp_handler = msg_get_rsp_handler(msg);
    init_response_mgr(&msg->rspmgr, msg, msg->is_read, rack_cnt, c_conn);
    log_info("msg %d:%d same DC racks:%d expect replies %d",
             msg->id, msg->parent_id, rack_cnt, msg->rspmgr.max_responses);
    for(rack_index = 0; rack_index < rack_cnt; rack_index++) {
        struct rack *rack = array_get(&dc->racks, rack_index);
        //log_debug(LOG_DEBUG, "rack name '%.*s'",
        //            rack->name->len, rack->name->data);
        struct msg *rack_msg;
        // clone message even for local node
        struct server_pool *pool = c_conn->owner;
        if (string_compare(rack->name, &pool->rack) == 0 ) {
            rack_msg = msg;
        } else {
            rack_msg = msg_get(c_conn, msg->request, __FUNCTION__);
            if (rack_msg == NULL) {
                log_debug(LOG_VERB, "whelp, looks like yer screwed "
                        "now, buddy. no inter-rack messages for "
                        "you!");
                continue;
            }

            msg_clone(msg, orig_mbuf, rack_msg);
            log_info("msg (%d:%d) clone to rack msg (%d:%d)",
                     msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id);
            rack_msg->swallow = true;
        }

        if (log_loggable(LOG_DEBUG)) {
            log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
                    dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);
        }
        log_debug(LOG_VERB, "c_conn: %p forwarding (%d:%d)",
                c_conn, rack_msg->id, rack_msg->parent_id);
        remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
    }
}
示例#4
0
static void
core_close_log(struct conn *conn)
{
	char type, *addrstr;

	if (conn->client) {
		type = 'c';
		addrstr = dn_unresolve_peer_desc(conn->sd);
	} else {
		type = conn->proxy ? 'p' : 's';
		addrstr = dn_unresolve_addr(conn->addr, conn->addrlen);
	}
	log_debug(LOG_NOTICE, "close %c %d '%s' on event %04"PRIX32" eof %d done "
			"%d rb %zu sb %zu%c %s", type, conn->sd, addrstr, conn->events,
			conn->eof, conn->done, conn->recv_bytes, conn->send_bytes,
			conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : "");

}
示例#5
0
static rstatus_t
proxy_accept(struct context *ctx, struct conn *p)
{
    rstatus_t status;
    struct conn *c;
    int sd;

    ASSERT(p->proxy && !p->client);
    ASSERT(p->sd > 0);
    ASSERT(p->recv_active && p->recv_ready);

    for (;;) {
        sd = accept(p->sd, NULL, NULL);
        if (sd < 0) {
            if (errno == EINTR) {
                log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd);
                continue;
            }

            if (errno == EAGAIN || errno == EWOULDBLOCK) {
                log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd);
                p->recv_ready = 0;
                return DN_OK;
            }

            /*
             * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask
             * it back in when some existing connection gets closed
             */

            log_error("accept on p %d failed: %s", p->sd, strerror(errno));
            return DN_ERROR;
        }

        break;
    }

    c = conn_get(p->owner, true, p->data_store);
    if (c == NULL) {
        log_error("get conn for c %d from p %d failed: %s", sd, p->sd,
                  strerror(errno));
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return DN_ENOMEM;
    }
    c->sd = sd;

    stats_pool_incr(ctx, c->owner, client_connections);

    status = dn_set_nonblocking(c->sd);
    if (status < 0) {
        log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    if (p->family == AF_INET || p->family == AF_INET6) {
        status = dn_set_tcpnodelay(c->sd);
        if (status < 0) {
            log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s",
                     c->sd, p->sd, strerror(errno));
        }
    }

    status = event_add_conn(ctx->evb, c);
    if (status < 0) {
        log_error("event add conn from p %d failed: %s", p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    log_debug(LOG_NOTICE, "accepted c %d on p %d from '%s'", c->sd, p->sd,
              dn_unresolve_peer_desc(c->sd));

    return DN_OK;
}
示例#6
0
static void
dnode_req_forward(struct context *ctx, struct conn *conn, struct msg *msg)
{
    struct server_pool *pool;
    uint8_t *key;
    uint32_t keylen;

    if (log_loggable(LOG_DEBUG)) {
       log_debug(LOG_DEBUG, "dnode_req_forward entering ");
    }
    log_debug(LOG_DEBUG, "DNODE REQ RECEIVED %s %d dmsg->id %u",
              conn_get_type_string(conn), conn->sd, msg->dmsg->id);

    ASSERT(conn->type == CONN_DNODE_PEER_CLIENT);

    pool = conn->owner;
    key = NULL;
    keylen = 0;

    log_debug(LOG_DEBUG, "conn %p adding message %d:%d", conn, msg->id, msg->parent_id);
    dictAdd(conn->outstanding_msgs_dict, &msg->id, msg);

    if (!string_empty(&pool->hash_tag)) {
        struct string *tag = &pool->hash_tag;
        uint8_t *tag_start, *tag_end;

        tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]);
        if (tag_start != NULL) {
            tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]);
            if (tag_end != NULL) {
                key = tag_start + 1;
                keylen = (uint32_t)(tag_end - key);
            }
        }
    }

    if (keylen == 0) {
        key = msg->key_start;
        keylen = (uint32_t)(msg->key_end - msg->key_start);
    }

    ASSERT(msg->dmsg != NULL);
    if (msg->dmsg->type == DMSG_REQ) {
       local_req_forward(ctx, conn, msg, key, keylen);
    } else if (msg->dmsg->type == DMSG_REQ_FORWARD) {
        struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr);
        struct datacenter *dc = server_get_dc(pool, &pool->dc);
        uint32_t rack_cnt = array_n(&dc->racks);
        uint32_t rack_index;
        for(rack_index = 0; rack_index < rack_cnt; rack_index++) {
            struct rack *rack = array_get(&dc->racks, rack_index);
            //log_debug(LOG_DEBUG, "forwarding to rack  '%.*s'",
            //            rack->name->len, rack->name->data);
            struct msg *rack_msg;
            if (string_compare(rack->name, &pool->rack) == 0 ) {
                rack_msg = msg;
            } else {
                rack_msg = msg_get(conn, msg->request, __FUNCTION__);
                if (rack_msg == NULL) {
                    log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
                    continue;
                }

                if (msg_clone(msg, orig_mbuf, rack_msg) != DN_OK) {
                    msg_put(rack_msg);
                    continue;
                }
                rack_msg->swallow = true;
            }

            if (log_loggable(LOG_DEBUG)) {
               log_debug(LOG_DEBUG, "forwarding request from conn '%s' to rack '%.*s' dc '%.*s' ",
                           dn_unresolve_peer_desc(conn->sd), rack->name->len, rack->name->data, rack->dc->len, rack->dc->data);
            }

            remote_req_forward(ctx, conn, rack_msg, rack, key, keylen);
        }
    }
}
/* Forward a client request over to a peer */
void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn,
                            struct conn *p_conn, struct msg *msg,
                            struct rack *rack, uint8_t *key, uint32_t keylen)
{

    struct server *server = p_conn->owner;
    log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to peer conn '%s' on rack '%.*s' dc '%.*s' ",
              dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(p_conn->sd),
              rack->name->len, rack->name->data,
              server->dc.len, server->dc.data);

    struct string *dc = rack->dc;
    rstatus_t status;
    /* enqueue message (request) into client outq, if response is expected */
    if (!msg->noreply && !msg->swallow) {
        conn_enqueue_outq(ctx, c_conn, msg);
    }

    ASSERT(p_conn->type == CONN_DNODE_PEER_SERVER);
    ASSERT((c_conn->type == CONN_CLIENT) ||
           (c_conn->type == CONN_DNODE_PEER_CLIENT));

    /* enqueue the message (request) into peer inq */
    status = event_add_out(ctx->evb, p_conn);
    if (status != DN_OK) {
        dnode_req_forward_error(ctx, p_conn, msg);
        p_conn->err = errno;
        return;
    }

    struct mbuf *header_buf = mbuf_get();
    if (header_buf == NULL) {
        loga("Unable to obtain an mbuf for dnode msg's header!");
        req_put(msg);
        return;
    }

    struct server_pool *pool = c_conn->owner;
    dmsg_type_t msg_type = (string_compare(&pool->dc, dc) != 0)? DMSG_REQ_FORWARD : DMSG_REQ;

    if (p_conn->dnode_secured) {
        //Encrypting and adding header for a request
        if (log_loggable(LOG_VVERB)) {
           log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN));
        }

        //write dnode header
        if (ENCRYPTION) {
            status = dyn_aes_encrypt_msg(msg, p_conn->aes_key);
            if (status == DN_ERROR) {
                loga("OOM to obtain an mbuf for encryption!");
                mbuf_put(header_buf);
                req_put(msg);
                return;
            }

            if (log_loggable(LOG_VVERB)) {
               log_debug(LOG_VERB, "#encrypted bytes : %d", status);
            }

            dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg));
        } else {
            if (log_loggable(LOG_VVERB)) {
               log_debug(LOG_VERB, "no encryption on the msg payload");
            }
            dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg));
        }

    } else {
        //write dnode header
        dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg));
    }

    mbuf_insert_head(&msg->mhdr, header_buf);

    if (log_loggable(LOG_VVERB)) {
        log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: ");
        msg_dump(msg);
    }

    conn_enqueue_inq(ctx, p_conn, msg);

    dnode_peer_req_forward_stats(ctx, p_conn->owner, msg);

    if (log_loggable(LOG_VVERB)) {
       log_debug(LOG_VVERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32
                   " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id,
                   msg->mlen, msg->type, keylen, key);
    }

}
示例#8
0
void
local_req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg,
                  uint8_t *key, uint32_t keylen)
{
    rstatus_t status;
    struct conn *s_conn;

    if (log_loggable(LOG_VVERB)) {
       loga("local_req_forward entering ............");
    }

    ASSERT((c_conn->type == CONN_CLIENT) ||
           (c_conn->type == CONN_DNODE_PEER_CLIENT));

    /* enqueue message (request) into client outq, if response is expected */
    if (msg->expect_datastore_reply) {
        conn_enqueue_outq(ctx, c_conn, msg);
    }

    s_conn = get_datastore_conn(ctx, c_conn->owner);
    log_debug(LOG_VERB, "c_conn %p got server conn %p", c_conn, s_conn);
    if (s_conn == NULL) {
        req_forward_error(ctx, c_conn, msg, errno);
        return;
    }
    ASSERT(s_conn->type == CONN_SERVER);

    if (log_loggable(LOG_DEBUG)) {
       log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to storage conn '%s'",
                    dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(s_conn->sd));
    }

    if (ctx->dyn_state == NORMAL) {
        /* enqueue the message (request) into server inq */
        if (TAILQ_EMPTY(&s_conn->imsg_q)) {
            status = event_add_out(ctx->evb, s_conn);

            if (status != DN_OK) {
                req_forward_error(ctx, c_conn, msg, errno);
                s_conn->err = errno;
                return;
            }
        }
    } else if (ctx->dyn_state == STANDBY) {  //no reads/writes from peers/clients
        log_debug(LOG_INFO, "Node is in STANDBY state. Drop write/read requests");
        req_forward_error(ctx, c_conn, msg, errno);
        return;
    } else if (ctx->dyn_state == WRITES_ONLY && msg->is_read) {
        //no reads from peers/clients but allow writes from peers/clients
        log_debug(LOG_INFO, "Node is in WRITES_ONLY state. Drop read requests");
        req_forward_error(ctx, c_conn, msg, errno);
        return;
    } else if (ctx->dyn_state == RESUMING) {
        log_debug(LOG_INFO, "Node is in RESUMING state. Still drop read requests and flush out all the queued writes");
        if (msg->is_read) {
            req_forward_error(ctx, c_conn, msg, errno);
            return;
        }

        status = event_add_out(ctx->evb, s_conn);

        if (status != DN_OK) {
            req_forward_error(ctx, c_conn, msg, errno);
            s_conn->err = errno;
            return;
        }
    }

    conn_enqueue_inq(ctx, s_conn, msg);
    req_forward_stats(ctx, msg);
    if(g_data_store == DATA_REDIS){
        req_redis_stats(ctx, msg);
    }


    if (log_loggable(LOG_VERB)) {
       log_debug(LOG_VERB, "local forward from c %d to s %d req %"PRIu64" len %"PRIu32
                " type %d with key '%.*s'", c_conn->sd, s_conn->sd, msg->id,
                msg->mlen, msg->type, keylen, key);
    }
}
示例#9
0
static void
req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg)
{
	struct server_pool *pool = c_conn->owner;
	uint8_t *key;
	uint32_t keylen;

	ASSERT(c_conn->client && !c_conn->proxy);

	if (msg->is_read)
		stats_pool_incr(ctx, pool, client_read_requests);
	else
		stats_pool_incr(ctx, pool, client_write_requests);

	key = NULL;
	keylen = 0;

	if (!string_empty(&pool->hash_tag)) {
		struct string *tag = &pool->hash_tag;
		uint8_t *tag_start, *tag_end;

		tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]);
		if (tag_start != NULL) {
			tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]);
			if (tag_end != NULL) {
				key = tag_start + 1;
				keylen = (uint32_t)(tag_end - key);
			}
		}
	}

	if (keylen == 0) {
		key = msg->key_start;
		keylen = (uint32_t)(msg->key_end - msg->key_start);
	}

	// need to capture the initial mbuf location as once we add in the dynomite headers (as mbufs to the src msg),
	// that will bork the request sent to secondary racks
	struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr);

	if (request_send_to_all_racks(msg)) {
		uint32_t dc_cnt = array_n(&pool->datacenters);
		uint32_t dc_index;
		for(dc_index = 0; dc_index < dc_cnt; dc_index++) {
			struct datacenter *dc = array_get(&pool->datacenters, dc_index);
			if (dc == NULL) {
				log_error("Wow, this is very bad, dc is NULL");
				return;
			}

			if (string_compare(dc->name, &pool->dc) == 0) { //send to all local racks
				//log_debug(LOG_DEBUG, "dc name  '%.*s'", dc->name->len, dc->name->data);
				uint32_t rack_cnt = array_n(&dc->racks);
				uint32_t rack_index;
				for(rack_index = 0; rack_index < rack_cnt; rack_index++) {
					struct rack *rack = array_get(&dc->racks, rack_index);
					//log_debug(LOG_DEBUG, "rack name '%.*s'", rack->name->len, rack->name->data);
					struct msg *rack_msg;
					if (string_compare(rack->name, &pool->rack) == 0 ) {
						rack_msg = msg;
					} else {
						rack_msg = msg_get(c_conn, msg->request, msg->redis);
						if (rack_msg == NULL) {
							log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
							continue;
						}

						msg_clone(msg, orig_mbuf, rack_msg);
						rack_msg->noreply = true;
					}

					log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
							dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);

					remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
				}
			} else {
            uint32_t rack_cnt = array_n(&dc->racks);
				if (rack_cnt == 0)
					continue;

				uint32_t ran_index = rand() % rack_cnt;
				struct rack *rack = array_get(&dc->racks, ran_index);

				struct msg *rack_msg = msg_get(c_conn, msg->request, msg->redis);
				if (rack_msg == NULL) {
					log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
					continue;
				}

				msg_clone(msg, orig_mbuf, rack_msg);
				rack_msg->noreply = true;

				log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
						dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);

				remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
			}
		}
	} else { //for read only requests
		struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc);
		remote_req_forward(ctx, c_conn, msg, rack, key, keylen);
	}
}