Esempio n. 1
0
static void
dnode_client_close_stats(struct context *ctx, struct server_pool *pool, err_t err,
                   unsigned eof)
{
    stats_pool_decr(ctx, dnode_client_connections);

    if (eof) {
        //fix this also
        stats_pool_incr(ctx, dnode_client_eof);
        return;
    }

    switch (err) {
    case EPIPE:
    case ETIMEDOUT:
    case ECONNRESET:
    case ECONNABORTED:
    case ENOTCONN:
    case ENETDOWN:
    case ENETUNREACH:
    case EHOSTDOWN:
    case EHOSTUNREACH:
    default:
        //fix this also
        stats_pool_incr(ctx, dnode_client_err);
        break;
    }
}
Esempio n. 2
0
static void
dnode_peer_close_stats(struct context *ctx, struct server *server, err_t err,
		unsigned eof, unsigned connected)
{
	if (connected) {
		//stats_server_decr(ctx, server, server_connections);
		stats_pool_decr(ctx, server->owner, peer_connections);
	}

	if (eof) {
		//stats_server_incr(ctx, server, server_eof);
		stats_pool_incr(ctx, server->owner, peer_eof);
		return;
	}

	switch (err) {
	case ETIMEDOUT:
		//stats_server_incr(ctx, server, server_timedout);
		stats_pool_incr(ctx, server->owner, peer_timedout);
		break;
	case EPIPE:
	case ECONNRESET:
	case ECONNABORTED:
	case ECONNREFUSED:
	case ENOTCONN:
	case ENETDOWN:
	case ENETUNREACH:
	case EHOSTDOWN:
	case EHOSTUNREACH:
	default:
		//stats_server_incr(ctx, server, server_err);
		stats_pool_incr(ctx, server->owner, peer_err);
		break;
	}
}
Esempio n. 3
0
static void
core_timeout(struct context *ctx)
{
	for (;;) {
		struct msg *msg;
		struct conn *conn;
		int64_t now, then;

		msg = msg_tmo_min();
		if (msg == NULL) {
			ctx->timeout = ctx->max_timeout;
			return;
		}

		/* skip over req that are in-error or done */

		if (msg->error || msg->done) {
			msg_tmo_delete(msg);
			continue;
		}

		/*
		 * timeout expired req and all the outstanding req on the timing
		 * out server
		 */

		conn = msg->tmo_rbe.data;
		then = msg->tmo_rbe.key;

		now = dn_msec_now();
		if (now < then) {
			int delta = (int)(then - now);
			ctx->timeout = MIN(delta, ctx->max_timeout);
			return;
		}

        log_warn("req %"PRIu64" on %s %d timedout, timeout was %d", msg->id,
                 conn_get_type_string(conn), conn->sd, msg->tmo_rbe.timeout);

		msg_tmo_delete(msg);

		if (conn->dyn_mode) {
			if (conn->type == CONN_DNODE_PEER_SERVER) { //outgoing peer requests
		 	   struct server *server = conn->owner;
                if (conn->same_dc)
			        stats_pool_incr(ctx, server->owner, peer_timedout_requests);
                else
			        stats_pool_incr(ctx, server->owner, remote_peer_timedout_requests);
			}
		} else {
			if (conn->type == CONN_SERVER) { //storage server requests
			   stats_server_incr(ctx, conn->owner, server_dropped_requests);
			}
		}

		conn->err = ETIMEDOUT;

		core_close(ctx, conn);
	}
}
Esempio n. 4
0
static void
rspmgr_incr_non_quorum_responses_stats(struct response_mgr *rspmgr)
{
    if (rspmgr->is_read)
        stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner,
                        client_non_quorum_r_responses);
    else
        stats_pool_incr(conn_to_ctx(rspmgr->conn), rspmgr->conn->owner,
                        client_non_quorum_w_responses);

}
static void
dnode_rsp_forward_stats(struct context *ctx, struct server *server, struct msg *msg)
{
	ASSERT(!msg->request);
	stats_pool_incr(ctx, server->owner, peer_responses);
	stats_pool_incr_by(ctx, server->owner, peer_response_bytes, msg->mlen);
}
Esempio n. 6
0
void
req_server_enqueue_imsgq(struct context *ctx, struct conn *conn, struct msg *msg)
{
    ASSERT(msg->request);
    ASSERT((!conn->client && !conn->proxy) || (!conn->dnode_client && !conn->dnode_server));

    /*
     * timeout clock starts ticking the instant the message is enqueued into
     * the server in_q; the clock continues to tick until it either expires
     * or the message is dequeued from the server out_q
     *
     * noreply request are free from timeouts because client is not interested
     * in the reponse anyway!
     */
    if (!msg->noreply) {
        msg_tmo_insert(msg, conn);
    }

    TAILQ_INSERT_TAIL(&conn->imsg_q, msg, s_tqe);

    if (!conn->dyn_mode) {
       stats_server_incr(ctx, conn->owner, in_queue);
       stats_server_incr_by(ctx, conn->owner, in_queue_bytes, msg->mlen);
    } else {
       struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0);
       stats_pool_incr(ctx, pool, peer_in_queue);
       stats_pool_incr_by(ctx, pool, peer_in_queue_bytes, msg->mlen);
    }
}
Esempio n. 7
0
/**
 *发送下一个messsage
 */
struct msg *
rsp_send_next(struct context *ctx, struct conn *conn)
{
    rstatus_t status;
    struct msg *msg, *pmsg; /* response and it's peer request */

    ASSERT(conn->client && !conn->proxy);

    pmsg = TAILQ_FIRST(&conn->omsg_q);
    if (pmsg == NULL || !req_done(conn, pmsg)) {//pmsg为空  并且请求未完成
        /* nothing is outstanding, initiate close? */
        if (pmsg == NULL && conn->eof) {//如果消息为假 并且连接eof为真
            conn->done = 1;//请求完成
            log_debug(LOG_INFO, "c %d is done", conn->sd);
        }
        //删除当前的请求
        status = event_del_out(ctx->evb, conn);
        if (status != NC_OK) {//删除失败
            conn->err = errno;//设置连接错误码
        }

        return NULL;
    }
    //连接发送的信息
    msg = conn->smsg;
    if (msg != NULL) {
        ASSERT(!msg->request && msg->peer != NULL);
        ASSERT(req_done(conn, msg->peer));
        pmsg = TAILQ_NEXT(msg->peer, c_tqe);
    }

    if (pmsg == NULL || !req_done(conn, pmsg)) {
        conn->smsg = NULL;
        return NULL;
    }

    ASSERT(pmsg->request && !pmsg->swallow);

    if (req_error(conn, pmsg)) {//如果有error
        msg = rsp_make_error(ctx, conn, pmsg);
        if (msg == NULL) {
            conn->err = errno;
            return NULL;
        }
        msg->peer = pmsg;
        pmsg->peer = msg;
        stats_pool_incr(ctx, conn->owner, forward_error);
    } else {
        msg = pmsg->peer;
    }
    //获取msg
    ASSERT(!msg->request);

    conn->smsg = msg;

    log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", msg->id, conn->sd);

    return msg;
}
Esempio n. 8
0
void
dnode_req_peer_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg)
{
    ASSERT(msg->request);
    ASSERT(conn->type == CONN_DNODE_PEER_SERVER);

    TAILQ_INSERT_TAIL(&conn->omsg_q, msg, s_tqe);
    log_debug(LOG_VERB, "conn %p enqueue outq %d:%d", conn, msg->id, msg->parent_id);

    //use only the 1st pool
    struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0);
    if (conn->same_dc)
        stats_pool_incr(ctx, pool, peer_out_queue);
    else
        stats_pool_incr(ctx, pool, remote_peer_out_queue);
   stats_pool_incr_by(ctx, pool, peer_out_queue_bytes, msg->mlen);
}
Esempio n. 9
0
static void
dnode_peer_req_forward_stats(struct context *ctx, struct server *server, struct msg *msg)
{
        ASSERT(msg->request);
        //use only the 1st pool
        //struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0);
        struct server_pool *pool = server->owner;
        stats_pool_incr(ctx, pool, peer_requests);
        stats_pool_incr_by(ctx, pool, peer_request_bytes, msg->mlen);
}
Esempio n. 10
0
void
dnode_req_client_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg)
{
    ASSERT(msg->request);
    ASSERT(conn->type == CONN_DNODE_PEER_CLIENT);

    log_debug(LOG_VERB, "conn %p enqueue outq %p", conn, msg);
    TAILQ_INSERT_TAIL(&conn->omsg_q, msg, c_tqe);

    //use only the 1st pool
    struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0);
    stats_pool_incr(ctx, pool, dnode_client_out_queue);
    stats_pool_incr_by(ctx, pool, dnode_client_out_queue_bytes, msg->mlen);
}
Esempio n. 11
0
static void
dnode_peer_failure(struct context *ctx, struct server *server)
{
	struct server_pool *pool = server->owner;
	int64_t now, next;
	rstatus_t status;

	//fix me
	if (!pool->auto_eject_hosts) {
		return;
	}

	server->failure_count++;

	log_debug(LOG_VERB, "dyn: peer '%.*s' failure count %"PRIu32" limit %"PRIu32,
			server->pname.len, server->pname.data, server->failure_count,
			pool->server_failure_limit);

	if (server->failure_count < pool->server_failure_limit) {
		return;
	}

	now = dn_usec_now();
	if (now < 0) {
		return;
	}

	//fix me
	//stats_server_set_ts(ctx, server, server_ejected_at, now);

	//fix me
	next = now + pool->server_retry_timeout;

	log_debug(LOG_INFO, "dyn: update peer pool %"PRIu32" '%.*s' to delete peer '%.*s' "
			"for next %"PRIu32" secs", pool->idx, pool->name.len,
			pool->name.data, server->pname.len, server->pname.data,
			pool->server_retry_timeout / 1000 / 1000);

	stats_pool_incr(ctx, pool, peer_ejects);

	server->failure_count = 0;
	server->next_retry = next;

	status = dnode_peer_pool_run(pool);
	if (status != DN_OK) {
		log_error("dyn: updating peer pool %"PRIu32" '%.*s' failed: %s", pool->idx,
				pool->name.len, pool->name.data, strerror(errno));
	}
}
Esempio n. 12
0
static void
dnode_req_client_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg)
{
    ASSERT(msg->request);
    ASSERT(conn->type == CONN_DNODE_PEER_CLIENT);

    log_debug(LOG_VERB, "conn %p enqueue outq %p", conn, msg);
    TAILQ_INSERT_TAIL(&conn->omsg_q, msg, c_tqe);

    //use only the 1st pool
    conn->omsg_count++;
    histo_add(&ctx->stats->dnode_client_out_queue, conn->omsg_count);
    stats_pool_incr(ctx, dnode_client_out_queue);
    stats_pool_incr_by(ctx, dnode_client_out_queue_bytes, msg->mlen);
}
Esempio n. 13
0
void
dnode_req_peer_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg)
{
	ASSERT(msg->request);
	ASSERT(!conn->dnode_client && !conn->dnode_server);

	TAILQ_INSERT_TAIL(&conn->omsg_q, msg, s_tqe);


	/* stats_server_incr(ctx, conn->owner, out_queue); */
	/* stats_server_incr_by(ctx, conn->owner, out_queue_bytes, msg->mlen); */

	//use only the 1st pool
	struct server_pool *pool = (struct server_pool *) array_get(&ctx->pool, 0);
	stats_pool_incr(ctx, pool, peer_out_queue);
	stats_pool_incr_by(ctx, pool, peer_out_queue_bytes, msg->mlen);
}
Esempio n. 14
0
void
dnode_peer_connected(struct context *ctx, struct conn *conn)
{
	struct server *server = conn->owner;

	ASSERT(!conn->dnode_server && !conn->dnode_client);
	ASSERT(conn->connecting && !conn->connected);

	//fix me
	//stats_server_incr(ctx, server, server_connections);
	stats_pool_incr(ctx, server->owner, peer_connections);

	conn->connecting = 0;
	conn->connected = 1;

	log_debug(LOG_INFO, "dyn: peer connected on sd %d to server '%.*s'", conn->sd,
			server->pname.len, server->pname.data);
}
Esempio n. 15
0
static void
client_close(struct context *ctx, struct conn *conn)
{
    rstatus_t status;
    struct msg *msg, *nmsg; /* current and next message */

    ASSERT(conn->type == CONN_CLIENT);

    client_close_stats(ctx, conn->owner, conn->err, conn->eof);

    if (conn->sd < 0) {
        client_unref(conn);
        return;
    }

    msg = conn->rmsg;
    if (msg != NULL) {
        conn->rmsg = NULL;

        ASSERT(msg->peer == NULL);
        ASSERT(msg->request && !msg->done);

        log_debug(LOG_INFO, "close c %d discarding pending req %"PRIu64" len "
                  "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen,
                  msg->type);

        req_put(msg);
    }

    ASSERT(conn->smsg == NULL);
    ASSERT(TAILQ_EMPTY(&conn->imsg_q));

    for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) {
        nmsg = TAILQ_NEXT(msg, c_tqe);

        /* dequeue the message (request) from client outq */
        conn_dequeue_outq(ctx, conn, msg);

        if (msg->done) {
            log_debug(LOG_INFO, "close c %d discarding %s req %"PRIu64" len "
                      "%"PRIu32" type %d", conn->sd,
                      msg->error ? "error": "completed", msg->id, msg->mlen,
                      msg->type);
            req_put(msg);
        } else {
            msg->swallow = 1;

            ASSERT(msg->request);
            ASSERT(msg->peer == NULL);

            log_debug(LOG_INFO, "close c %d schedule swallow of req %"PRIu64" "
                      "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen,
                      msg->type);
        }

        stats_pool_incr(ctx, client_dropped_requests);
    }
    ASSERT(TAILQ_EMPTY(&conn->omsg_q));

    status = close(conn->sd);
    if (status < 0) {
        log_error("close c %d failed, ignored: %s", conn->sd, strerror(errno));
    }
    conn->sd = -1;
    client_unref(conn);
}
Esempio n. 16
0
void
dnode_peer_close(struct context *ctx, struct conn *conn)
{
	rstatus_t status;
	struct msg *msg, *nmsg; /* current and next message */
	struct conn *c_conn;    /* peer client connection */

	struct server *server = conn->owner;

	log_debug(LOG_WARN, "dyn: dnode_peer_close on peer '%.*s'", server->pname.len,
			server->pname.data);

	ASSERT(!conn->dnode_server && !conn->dnode_client);

	dnode_peer_close_stats(ctx, conn->owner, conn->err, conn->eof,
			conn->connected);

	if (conn->sd < 0) {
		dnode_peer_failure(ctx, conn->owner);
		conn->unref(conn);
		conn_put(conn);
		return;
	}

	for (msg = TAILQ_FIRST(&conn->imsg_q); msg != NULL; msg = nmsg) {
		nmsg = TAILQ_NEXT(msg, s_tqe);

		/* dequeue the message (request) from server inq */
		conn->dequeue_inq(ctx, conn, msg);

		/*
		 * Don't send any error response, if
		 * 1. request is tagged as noreply or,
		 * 2. client has already closed its connection
		 */
		if (msg->swallow || msg->noreply) {
			log_debug(LOG_INFO, "dyn: close s %d swallow req %"PRIu64" len %"PRIu32
					" type %d", conn->sd, msg->id, msg->mlen, msg->type);
			req_put(msg);
		} else {
			c_conn = msg->owner;
			ASSERT(c_conn->client && !c_conn->proxy);

			msg->done = 1;
			msg->error = 1;
			msg->err = conn->err;
			msg->dyn_error = PEER_CONNECTION_REFUSE;

			if (TAILQ_FIRST(&c_conn->omsg_q) != NULL && req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) {
				event_add_out(ctx->evb, msg->owner);
			}

			log_debug(LOG_INFO, "dyn: close s %d schedule error for req %"PRIu64" "
					"len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id,
					msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ',
							conn->err ? strerror(conn->err): " ");
		}

		if (!conn->dnode_client) {
		   stats_pool_incr(ctx, server->owner, peer_dropped_requests);
		}
	}
	ASSERT(TAILQ_EMPTY(&conn->imsg_q));

	for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) {
		nmsg = TAILQ_NEXT(msg, s_tqe);

		/* dequeue the message (request) from server outq */
		conn->dequeue_outq(ctx, conn, msg);

		if (msg->swallow) {
			log_debug(LOG_INFO, "dyn: close s %d swallow req %"PRIu64" len %"PRIu32
					" type %d", conn->sd, msg->id, msg->mlen, msg->type);
			req_put(msg);
		} else {
			c_conn = msg->owner;
			ASSERT(!c_conn->dnode_client && !c_conn->dnode_server);

			msg->done = 1;
			msg->error = 1;
			msg->err = conn->err;

			if (TAILQ_FIRST(&c_conn->omsg_q) != NULL && req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) {
				event_add_out(ctx->evb, msg->owner);
			}

			log_debug(LOG_INFO, "dyn: close s %d schedule error for req %"PRIu64" "
					"len %"PRIu32" type %d from c %d%c %s", conn->sd, msg->id,
					msg->mlen, msg->type, c_conn->sd, conn->err ? ':' : ' ',
							conn->err ? strerror(conn->err): " ");
		}
	}
	ASSERT(TAILQ_EMPTY(&conn->omsg_q));

	msg = conn->rmsg;
	if (msg != NULL) {
		conn->rmsg = NULL;

		ASSERT(!msg->request);
		ASSERT(msg->peer == NULL);

		rsp_put(msg);

		log_debug(LOG_INFO, "dyn: close s %d discarding rsp %"PRIu64" len %"PRIu32" "
				"in error", conn->sd, msg->id, msg->mlen);
	}

	ASSERT(conn->smsg == NULL);

	dnode_peer_failure(ctx, conn->owner);

	conn->unref(conn);

	status = close(conn->sd);
	if (status < 0) {
		log_error("dyn: close s %d failed, ignored: %s", conn->sd, strerror(errno));
	}
	conn->sd = -1;

	conn_put(conn);
}
Esempio n. 17
0
struct msg *
rsp_send_next(struct context *ctx, struct conn *conn)
{
    rstatus_t status;
    struct msg *rsp, *req; /* response and it's peer request */

    ASSERT_LOG((conn->type == CONN_DNODE_PEER_CLIENT) ||
               (conn->type = CONN_CLIENT), "conn %s", conn_get_type_string(conn));

    req = TAILQ_FIRST(&conn->omsg_q);
    if (req == NULL || (!req->selected_rsp && !req_done(conn, req))) {
        /* nothing is outstanding, initiate close? */
        if (req == NULL && conn->eof) {
            conn->done = 1;
            log_debug(LOG_INFO, "c %d is done", conn->sd);
        }

        status = event_del_out(ctx->evb, conn);
        if (status != DN_OK) {
            conn->err = errno;
        }

        return NULL;
    }

    rsp = conn->smsg;
    if (rsp != NULL) {
        ASSERT(!rsp->request);
        ASSERT(rsp->peer != NULL);
        req = TAILQ_NEXT(rsp->peer, c_tqe);
    }

    if (req == NULL || !req_done(conn, req)) {
        conn->smsg = NULL;
        return NULL;
    }
    ASSERT(req->request && !req->swallow);

    if (req_error(conn, req)) {
        rsp = rsp_make_error(ctx, conn, req);
        if (rsp == NULL) {
            conn->err = errno;
            return NULL;
        }
        rsp->peer = req;
        req->selected_rsp = rsp;
        log_debug(LOG_VERB, "creating new error rsp %p", rsp);
        if (conn->dyn_mode) {
      	  stats_pool_incr(ctx, peer_forward_error);
        } else {
      	  stats_pool_incr(ctx, forward_error);
        }
    } else {
        rsp = req->selected_rsp;
    }
    ASSERT(!rsp->request);

    conn->smsg = rsp;

    if (log_loggable(LOG_VVERB)) {
       log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", rsp->id, conn->sd);
    }

    return rsp;
}
Esempio n. 18
0
/* There are chances that the request to the remote peer or its response got dropped.
 * Hence we may not always receive a response to the request at the head of the FIFO.
 * Hence what we do is we mark that request as errored and move on the next one
 * in the outgoing queue. This works since we always have message ids in monotonically
 * increasing order.
 */
static void
dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp)
{
    rstatus_t status;
    struct msg *req;
    struct conn *c_conn;

    ASSERT(!peer_conn->dnode_client && !peer_conn->dnode_server);

    /* response from a peer implies that peer is ok and heartbeating */
    dnode_peer_ok(ctx, peer_conn);

    /* dequeue peer message (request) from peer conn */
    while (true) {
        req = TAILQ_FIRST(&peer_conn->omsg_q);
        log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp);
        c_conn = req->owner;
        if (req->id == rsp->dmsg->id) {
            dnode_rsp_forward_match(ctx, peer_conn, rsp);
            return;
        }
        // Report a mismatch and try to rectify
        log_error("MISMATCH: dnode %c %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u",
                  peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'),
                  peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id,
                  rsp->parent_id);
        if (c_conn && conn_to_ctx(c_conn))
            stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner,
                            peer_mismatch_requests);

        // TODO : should you be worried about message id getting wrapped around to 0?
        if (rsp->dmsg->id < req->id) {
            // We received a response from the past. This indeed proves out of order
            // responses. A blunder to the architecture. Log it and drop the response.
            log_error("MISMATCH: received response from the past. Dropping it");
            dnode_rsp_put(rsp);
            return;
        }

        if (req->consistency == DC_ONE) {
            if (req->swallow) {
                // swallow the request and move on the next one
                dnode_rsp_swallow(ctx, peer_conn, req, NULL);
                continue;
            }
            log_warn("req %d:%d with DC_ONE consistency is not being swallowed");
        }

        if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) {
            if (req->swallow) {
                // swallow the request and move on the next one
                dnode_rsp_swallow(ctx, peer_conn, req, NULL);
                continue;
            }
            log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed");
        }

        log_error("MISMATCHED DNODE RSP RECEIVED %c %d dmsg->id %u req %u:%u rsp %u:%u, skipping....",
                  peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'),
                  peer_conn->sd, rsp->dmsg->id,
                  req->id, req->parent_id, rsp->id, rsp->parent_id);
        ASSERT(req != NULL && req->peer == NULL);
        ASSERT(req->request && !req->done);

        if (log_loggable(LOG_VVERB)) {
            loga("skipping req:   ");
            msg_dump(req);
        }


        peer_conn->dequeue_outq(ctx, peer_conn, req);
        req->done = 1;

        // Create an appropriate response for the request so its propagated up;
        struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store);
        err_rsp->error = req->error = 1;
        err_rsp->err = req->err = BAD_FORMAT;
        err_rsp->dyn_error = req->dyn_error = BAD_FORMAT;
        err_rsp->dmsg = dmsg_get();
        err_rsp->dmsg->id = req->id;
        log_debug(LOG_VERB, "%p <-> %p", req, err_rsp);
        /* establish err_rsp <-> req (response <-> request) link */
        req->peer = err_rsp;
        err_rsp->peer = req;

        log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u",
                  peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id);
        rstatus_t status =
            conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id,
                                 err_rsp);
        IGNORE_RET_VAL(status);
        if (req->swallow) {
            log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id);
            req_put(req);
        }
    }
}
Esempio n. 19
0
static void
req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg)
{
	struct server_pool *pool = c_conn->owner;
	uint8_t *key;
	uint32_t keylen;

	ASSERT(c_conn->client && !c_conn->proxy);

	if (msg->is_read)
		stats_pool_incr(ctx, pool, client_read_requests);
	else
		stats_pool_incr(ctx, pool, client_write_requests);

	key = NULL;
	keylen = 0;

	if (!string_empty(&pool->hash_tag)) {
		struct string *tag = &pool->hash_tag;
		uint8_t *tag_start, *tag_end;

		tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]);
		if (tag_start != NULL) {
			tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]);
			if (tag_end != NULL) {
				key = tag_start + 1;
				keylen = (uint32_t)(tag_end - key);
			}
		}
	}

	if (keylen == 0) {
		key = msg->key_start;
		keylen = (uint32_t)(msg->key_end - msg->key_start);
	}

	// need to capture the initial mbuf location as once we add in the dynomite headers (as mbufs to the src msg),
	// that will bork the request sent to secondary racks
	struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr);

	if (request_send_to_all_racks(msg)) {
		uint32_t dc_cnt = array_n(&pool->datacenters);
		uint32_t dc_index;
		for(dc_index = 0; dc_index < dc_cnt; dc_index++) {
			struct datacenter *dc = array_get(&pool->datacenters, dc_index);
			if (dc == NULL) {
				log_error("Wow, this is very bad, dc is NULL");
				return;
			}

			if (string_compare(dc->name, &pool->dc) == 0) { //send to all local racks
				//log_debug(LOG_DEBUG, "dc name  '%.*s'", dc->name->len, dc->name->data);
				uint32_t rack_cnt = array_n(&dc->racks);
				uint32_t rack_index;
				for(rack_index = 0; rack_index < rack_cnt; rack_index++) {
					struct rack *rack = array_get(&dc->racks, rack_index);
					//log_debug(LOG_DEBUG, "rack name '%.*s'", rack->name->len, rack->name->data);
					struct msg *rack_msg;
					if (string_compare(rack->name, &pool->rack) == 0 ) {
						rack_msg = msg;
					} else {
						rack_msg = msg_get(c_conn, msg->request, msg->redis);
						if (rack_msg == NULL) {
							log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
							continue;
						}

						msg_clone(msg, orig_mbuf, rack_msg);
						rack_msg->noreply = true;
					}

					log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
							dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);

					remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
				}
			} else {
            uint32_t rack_cnt = array_n(&dc->racks);
				if (rack_cnt == 0)
					continue;

				uint32_t ran_index = rand() % rack_cnt;
				struct rack *rack = array_get(&dc->racks, ran_index);

				struct msg *rack_msg = msg_get(c_conn, msg->request, msg->redis);
				if (rack_msg == NULL) {
					log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
					continue;
				}

				msg_clone(msg, orig_mbuf, rack_msg);
				rack_msg->noreply = true;

				log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'",
						dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data);

				remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen);
			}
		}
	} else { //for read only requests
		struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc);
		remote_req_forward(ctx, c_conn, msg, rack, key, keylen);
	}
}
Esempio n. 20
0
static void
req_forward(struct context *ctx, struct conn *c_conn, struct msg *msg)
{
    struct server_pool *pool = c_conn->owner;
    uint8_t *key;
    uint32_t keylen;

    ASSERT(c_conn->type == CONN_CLIENT);

    if (msg->is_read) {
        if (msg->type != MSG_REQ_REDIS_PING)
            stats_pool_incr(ctx, client_read_requests);
    } else
        stats_pool_incr(ctx, client_write_requests);

    key = NULL;
    keylen = 0;

    // add the message to the dict
    log_debug(LOG_DEBUG, "conn %p adding message %d:%d", c_conn, msg->id, msg->parent_id);
    dictAdd(c_conn->outstanding_msgs_dict, &msg->id, msg);

    if (!string_empty(&pool->hash_tag)) {
        struct string *tag = &pool->hash_tag;
        uint8_t *tag_start, *tag_end;

        tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]);
        if (tag_start != NULL) {
            tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]);
            if (tag_end != NULL) {
                key = tag_start + 1;
                keylen = (uint32_t)(tag_end - key);
            }
        }
    }

    if (keylen == 0) {
        key = msg->key_start;
        keylen = (uint32_t)(msg->key_end - msg->key_start);
    }

    // need to capture the initial mbuf location as once we add in the dynomite
    // headers (as mbufs to the src msg), that will bork the request sent to
    // secondary racks
    struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr);

    if (ctx->admin_opt == 1) {
        if (msg->type == MSG_REQ_REDIS_DEL || msg->type == MSG_REQ_MC_DELETE) {
          struct rack * rack = server_get_rack_by_dc_rack(pool, &pool->rack, &pool->dc);
          admin_local_req_forward(ctx, c_conn, msg, rack, key, keylen);
          return;
        }
    }

    if (msg->msg_routing == ROUTING_LOCAL_NODE_ONLY) {
        // Strictly local host only
        msg->consistency = DC_ONE;
        msg->rsp_handler = msg_local_one_rsp_handler;
        local_req_forward(ctx, c_conn, msg, key, keylen);
        return;
    }

    if (msg->is_read) {
        msg->consistency = conn_get_read_consistency(c_conn);
    } else {
        msg->consistency = conn_get_write_consistency(c_conn);
    }

    /* forward the request */
    uint32_t dc_cnt = array_n(&pool->datacenters);
    uint32_t dc_index;

    for(dc_index = 0; dc_index < dc_cnt; dc_index++) {

        struct datacenter *dc = array_get(&pool->datacenters, dc_index);
        if (dc == NULL) {
            log_error("Wow, this is very bad, dc is NULL");
            return;
        }

        if (string_compare(dc->name, &pool->dc) == 0)
            req_forward_local_dc(ctx, c_conn, msg, orig_mbuf, key, keylen, dc);
        else if (request_send_to_all_dcs(msg)) {
            req_forward_remote_dc(ctx, c_conn, msg, orig_mbuf, key, keylen, dc);
        }
    }
}
Esempio n. 21
0
static rstatus_t
proxy_accept(struct context *ctx, struct conn *p)
{
    rstatus_t status;
    struct conn *c;
    int sd;
    struct sockaddr_storage addr;
    socklen_t addr_len;

    ASSERT(p->proxy && !p->client);
    ASSERT(p->sd > 0);
    ASSERT(p->recv_active && p->recv_ready);

    for (;;) {
        sd = accept(p->sd, NULL, NULL);
        if (sd < 0) {
            if (errno == EINTR) {
                log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd);
                continue;
            }

            if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ECONNABORTED) {
                log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd);
                p->recv_ready = 0;
                return NC_OK;
            }

            /*
             * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask
             * it back in when some existing connection gets closed
             */
            
            /* 
             * Workaround of https://github.com/twitter/twemproxy/issues/97
             * Just ignore EMFILE/ENFILE, return NC_OK will enable the server 
             * continue to run instead of close the server socket
             */
            if (errno == EMFILE || errno == ENFILE) {
                log_crit("accept on p %d failed: %s", p->sd,
                         strerror(errno));
                p->recv_ready = 0;

                log_crit("connections status: rlimit nofile %d, "
                         "used connections: %d, max client connections %d, "
                         "curr client connections %d", ctx->rlimit_nofile,
                         conn_ncurr(), ctx->max_ncconn, conn_ncurr_cconn());
                /* Since we maintain a safe max_ncconn and check
                 * it after every accept, we should not reach here.
                 * So we will panic after this log */
                log_panic("HIT MAX OPEN FILES, IT SHOULD NOT HAPPEN. ABORT.");

                return NC_OK;
            }

            log_error("accept on p %d failed: %s", p->sd, strerror(errno));
            return NC_ERROR;
        }
        addr_len = sizeof(addr);
        if (getsockname(sd, (struct sockaddr *)&addr, &addr_len)) {
            log_error("getsockname on p %d failed: %s", p->sd, strerror(errno));
            close(sd);
            continue;
        }

        break;
    }

    if (conn_ncurr_cconn() >= ctx->max_ncconn) {
        stats_pool_incr(ctx, p->owner, rejected_connections);

        log_crit("client connections %d exceed limit %d",
                 conn_ncurr_cconn(), ctx->max_ncconn);
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return NC_OK;
    }

    c = conn_get(p->owner, true, p->redis);
    if (c == NULL) {
        log_error("get conn for c %d from p %d failed: %s", sd, p->sd,
                  strerror(errno));
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return NC_ENOMEM;
    }
    c->sd = sd;
    c->family = addr.ss_family;
    c->addrlen = addr_len;
    c->ss = addr;
    c->addr = (struct sockaddr *)&c->ss;

    stats_pool_incr(ctx, c->owner, client_connections);

    status = nc_set_nonblocking(c->sd);
    if (status < 0) {
        log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    if (p->family == AF_INET || p->family == AF_INET6) {
        status = nc_set_tcpnodelay(c->sd);
        if (status < 0) {
            log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s",
                     c->sd, p->sd, strerror(errno));
        }
    }

    status = event_add_conn(ctx->evb, c);
    if (status < 0) {
        log_error("event add conn from p %d failed: %s", p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    log_notice("accepted c %d on p %d from '%s'", c->sd, p->sd,
               nc_unresolve_peer_desc(c->sd));

    return NC_OK;
}
Esempio n. 22
0
static rstatus_t
proxy_accept(struct context *ctx, struct conn *p)
{
    rstatus_t status;
    struct conn *c;
    int sd;

    ASSERT(p->proxy && !p->client);
    ASSERT(p->sd > 0);
    ASSERT(p->recv_active && p->recv_ready);

    for (;;) {
        sd = accept(p->sd, NULL, NULL);
        if (sd < 0) {
            if (errno == EINTR) {
                log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd);
                continue;
            }

            if (errno == EAGAIN || errno == EWOULDBLOCK) {
                log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd);
                p->recv_ready = 0;
                return DN_OK;
            }

            /*
             * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask
             * it back in when some existing connection gets closed
             */

            log_error("accept on p %d failed: %s", p->sd, strerror(errno));
            return DN_ERROR;
        }

        break;
    }

    c = conn_get(p->owner, true, p->data_store);
    if (c == NULL) {
        log_error("get conn for c %d from p %d failed: %s", sd, p->sd,
                  strerror(errno));
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return DN_ENOMEM;
    }
    c->sd = sd;

    stats_pool_incr(ctx, c->owner, client_connections);

    status = dn_set_nonblocking(c->sd);
    if (status < 0) {
        log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    if (p->family == AF_INET || p->family == AF_INET6) {
        status = dn_set_tcpnodelay(c->sd);
        if (status < 0) {
            log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s",
                     c->sd, p->sd, strerror(errno));
        }
    }

    status = event_add_conn(ctx->evb, c);
    if (status < 0) {
        log_error("event add conn from p %d failed: %s", p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    log_debug(LOG_NOTICE, "accepted c %d on p %d from '%s'", c->sd, p->sd,
              dn_unresolve_peer_desc(c->sd));

    return DN_OK;
}
Esempio n. 23
0
//接收到客户端连接后,返回新的fd,为该fd创建新的conn来读取数据
static rstatus_t
proxy_accept(struct context *ctx, struct conn *p) //p对应的是proxy conn 也就是用于监听客户端的conn信息
{
    rstatus_t status;
    struct conn *c;
    int sd;
    struct server_pool *pool = p->owner;

    ASSERT(p->proxy && !p->client);
    ASSERT(p->sd > 0);
    ASSERT(p->recv_active && p->recv_ready);

    for (;;) {
        sd = accept(p->sd, NULL, NULL); //获取到新的客户端连接,产生新的fd
        if (sd < 0) {
            if (errno == EINTR) {
                log_debug(LOG_VERB, "accept on p %d not ready - eintr", p->sd);
                continue;
            }

            if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ECONNABORTED) {
                log_debug(LOG_VERB, "accept on p %d not ready - eagain", p->sd);
                p->recv_ready = 0;
                return NC_OK;
            }

            /*
             * Workaround of https://github.com/twitter/twemproxy/issues/97
             *
             * We should never reach here because the check for conn_ncurr_cconn()
             * against ctx->max_ncconn should catch this earlier in the cycle.
             * If we reach here ignore EMFILE/ENFILE, return NC_OK will enable
             * the server continue to run instead of close the server socket
             *
             * The right solution however, is on EMFILE/ENFILE to mask out IN
             * event on the proxy and mask it back in when some existing
             * connections gets closed
             */
            if (errno == EMFILE || errno == ENFILE) {
                log_debug(LOG_CRIT, "accept on p %d with max fds %"PRIu32" "
                          "used connections %"PRIu32" max client connections %"PRIu32" "
                          "curr client connections %"PRIu32" failed: %s",
                          p->sd, ctx->max_nfd, conn_ncurr_conn(),
                          ctx->max_ncconn, conn_ncurr_cconn(), strerror(errno));

                p->recv_ready = 0;

                return NC_OK;
            }

            log_error("accept on p %d failed: %s", p->sd, strerror(errno));

            return NC_ERROR;
        }

        break;
    }

    if (conn_ncurr_cconn() >= ctx->max_ncconn) {
        log_debug(LOG_CRIT, "client connections %"PRIu32" exceed limit %"PRIu32,
                  conn_ncurr_cconn(), ctx->max_ncconn);
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return NC_OK;
    }

    c = conn_get(p->owner, true, p->redis);
    if (c == NULL) {
        log_error("get conn for c %d from p %d failed: %s", sd, p->sd,
                  strerror(errno));
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return NC_ENOMEM;
    }
    c->sd = sd;

    stats_pool_incr(ctx, c->owner, client_connections);

    status = nc_set_nonblocking(c->sd);
    if (status < 0) {
        log_error("set nonblock on c %d from p %d failed: %s", c->sd, p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    if (pool->tcpkeepalive) {
        status = nc_set_tcpkeepalive(c->sd);
        if (status < 0) {
            log_warn("set tcpkeepalive on c %d from p %d failed, ignored: %s",
                     c->sd, p->sd, strerror(errno));
        }
    }

    if (p->family == AF_INET || p->family == AF_INET6) {
        status = nc_set_tcpnodelay(c->sd);
        if (status < 0) {
            log_warn("set tcpnodelay on c %d from p %d failed, ignored: %s",
                     c->sd, p->sd, strerror(errno));
        }
    }

    status = event_add_conn(ctx->evb, c);
    if (status < 0) {
        log_error("event add conn from p %d failed: %s", p->sd,
                  strerror(errno));
        c->close(ctx, c);
        return status;
    }

    log_debug(LOG_INFO, "accepted c %d on p %d from '%s'", c->sd, p->sd,
              nc_unresolve_peer_desc(c->sd));

    return NC_OK;
}
Esempio n. 24
0
struct msg *
rsp_send_next(struct context *ctx, struct conn *conn)
{
    rstatus_t status;
    struct msg *msg, *pmsg; /* response and it's peer request */

    ASSERT((conn->client && !conn->proxy) || (conn->dnode_client && !conn->dnode_server));

    pmsg = TAILQ_FIRST(&conn->omsg_q);
    if (pmsg == NULL || !req_done(conn, pmsg)) {
        /* nothing is outstanding, initiate close? */
        if (pmsg == NULL && conn->eof) {
            conn->done = 1;
            log_debug(LOG_INFO, "c %d is done", conn->sd);
        }

        status = event_del_out(ctx->evb, conn);
        if (status != DN_OK) {
            conn->err = errno;
        }

        return NULL;
    }

    msg = conn->smsg;
    if (msg != NULL) {
        ASSERT(!msg->request && msg->peer != NULL);
        ASSERT(req_done(conn, msg->peer));
        pmsg = TAILQ_NEXT(msg->peer, c_tqe);
    }

    if (pmsg == NULL || !req_done(conn, pmsg)) {
        conn->smsg = NULL;
        return NULL;
    }
    ASSERT(pmsg->request && !pmsg->swallow);

    if (req_error(conn, pmsg)) {
        msg = rsp_make_error(ctx, conn, pmsg);
        if (msg == NULL) {
            conn->err = errno;
            return NULL;
        }
        msg->peer = pmsg;
        pmsg->peer = msg;
        if (!conn->dyn_mode) {
           stats_pool_incr(ctx, conn->owner, forward_error);
        } else {  //dyn_mode
           stats_pool_incr(ctx, conn->owner, peer_forward_error);
        }
    } else {
        msg = pmsg->peer;
    }
    ASSERT(!msg->request);

    conn->smsg = msg;

    log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", msg->id, conn->sd);

    return msg;
}