Exemple #1
0
rstatus_t
core_core(void *arg, uint32_t events)
{
	rstatus_t status;
	struct conn *conn = arg;
	struct context *ctx = conn_to_ctx(conn);

    log_debug(LOG_VVERB, "event %04"PRIX32" on %s %d", events,
              conn_get_type_string(conn), conn->sd);

	conn->events = events;

	/* error takes precedence over read | write */
	if (events & EVENT_ERR) {
		if (conn->err && conn->dyn_mode) {
			loga("conn err on dnode EVENT_ERR: %d", conn->err);
		}
		core_error(ctx, conn);

		return DN_ERROR;
	}

	/* read takes precedence over write */
	if (events & EVENT_READ) {
		status = core_recv(ctx, conn);

		if (status != DN_OK || conn->done || conn->err) {
			if (conn->dyn_mode) {
				if (conn->err) {
					loga("conn err on dnode EVENT_READ: %d", conn->err);
					core_close(ctx, conn);
					return DN_ERROR;
				}
				return DN_OK;
			}

			core_close(ctx, conn);
			return DN_ERROR;
		}
	}

	if (events & EVENT_WRITE) {
		status = core_send(ctx, conn);
		if (status != DN_OK || conn->done || conn->err) {
			if (conn->dyn_mode) {
				if (conn->err) {
					loga("conn err on dnode EVENT_WRITE: %d", conn->err);
					core_close(ctx, conn);
					return DN_ERROR;
				}
				return DN_OK;
			}

			core_close(ctx, conn);
			return DN_ERROR;
		}
	}

	return DN_OK;
}
static void
dnode_rsp_send_done(struct context *ctx, struct conn *conn, struct msg *rsp)
{
    if (log_loggable(LOG_VVERB)) {
       log_debug(LOG_VVERB, "dnode_rsp_send_done entering");
   }

    struct msg *req; /* peer message (request) */

    ASSERT(conn->type == CONN_DNODE_PEER_CLIENT);
    ASSERT(conn->smsg == NULL);

    log_debug(LOG_VERB, "dyn: send done rsp %"PRIu64" on c %d", rsp->id, conn->sd);

    req = rsp->peer;

    ASSERT(!rsp->request && req->request);
    ASSERT(req->selected_rsp == rsp);
    ASSERT(req->done && !req->swallow);
    log_debug(LOG_DEBUG, "DNODE RSP SENT %s %d dmsg->id %u",
              conn_get_type_string(conn),
             conn->sd, req->dmsg->id);

    /* dequeue request from client outq */
    conn_dequeue_outq(ctx, conn, req);

    req_put(req);
}
Exemple #3
0
static void
core_timeout(struct context *ctx)
{
	for (;;) {
		struct msg *msg;
		struct conn *conn;
		int64_t now, then;

		msg = msg_tmo_min();
		if (msg == NULL) {
			ctx->timeout = ctx->max_timeout;
			return;
		}

		/* skip over req that are in-error or done */

		if (msg->error || msg->done) {
			msg_tmo_delete(msg);
			continue;
		}

		/*
		 * timeout expired req and all the outstanding req on the timing
		 * out server
		 */

		conn = msg->tmo_rbe.data;
		then = msg->tmo_rbe.key;

		now = dn_msec_now();
		if (now < then) {
			int delta = (int)(then - now);
			ctx->timeout = MIN(delta, ctx->max_timeout);
			return;
		}

        log_warn("req %"PRIu64" on %s %d timedout, timeout was %d", msg->id,
                 conn_get_type_string(conn), conn->sd, msg->tmo_rbe.timeout);

		msg_tmo_delete(msg);

		if (conn->dyn_mode) {
			if (conn->type == CONN_DNODE_PEER_SERVER) { //outgoing peer requests
		 	   struct server *server = conn->owner;
                if (conn->same_dc)
			        stats_pool_incr(ctx, server->owner, peer_timedout_requests);
                else
			        stats_pool_incr(ctx, server->owner, remote_peer_timedout_requests);
			}
		} else {
			if (conn->type == CONN_SERVER) { //storage server requests
			   stats_server_incr(ctx, conn->owner, server_dropped_requests);
			}
		}

		conn->err = ETIMEDOUT;

		core_close(ctx, conn);
	}
}
Exemple #4
0
static rstatus_t
core_send(struct context *ctx, struct conn *conn)
{
	rstatus_t status;

	status = conn_send(ctx, conn);
	if (status != DN_OK) {
		log_info("send on %s %d failed: %s", conn_get_type_string(conn),
				 conn->sd, strerror(errno));
	}

	return status;
}
Exemple #5
0
static void
core_error(struct context *ctx, struct conn *conn)
{
	rstatus_t status;

	status = dn_get_soerror(conn->sd);
	if (status < 0) {
	log_warn("get soerr on %s client %d failed, ignored: %s",
             conn_get_type_string(conn), conn->sd, strerror(errno));
	}
	conn->err = errno;

	core_close(ctx, conn);
}
Exemple #6
0
static void
server_ack_err(struct context *ctx, struct conn *conn, struct msg *req)
{
    // I want to make sure we do not have swallow here.
    //ASSERT_LOG(!req->swallow, "req %d:%d has swallow set??", req->id, req->parent_id);
    if ((req->swallow && req->noreply) ||
        (req->swallow && (req->consistency == DC_ONE)) ||
        (req->swallow && (req->consistency == DC_QUORUM)
                      && (!conn->same_dc))) {
        log_debug(LOG_INFO, "dyn: close s %d swallow req %"PRIu64" len %"PRIu32
                  " type %d", conn->sd, req->id, req->mlen, req->type);
        req_put(req);
        return;
    }
    struct conn *c_conn = req->owner;
    // At other connections, these responses would be swallowed.
    ASSERT_LOG((c_conn->type == CONN_CLIENT) ||
               (c_conn->type == CONN_DNODE_PEER_CLIENT), "c_conn type %s",
               conn_get_type_string(c_conn));

    // Create an appropriate response for the request so its propagated up;
    // This response gets dropped in rsp_make_error anyways. But since this is
    // an error path its ok with the overhead.
    struct msg *rsp = msg_get(conn, false, conn->data_store);
    if (rsp == NULL) {
        log_warn("Could not allocate msg.");
        return;
    }
    req->done = 1;
    req->peer = rsp;
    rsp->peer = req;
    rsp->error = req->error = 1;
    rsp->err = req->err = conn->err;
    rsp->dyn_error = req->dyn_error = STORAGE_CONNECTION_REFUSE;
    rsp->dmsg = NULL;
    log_warn("%d:%d <-> %d:%d", req->id, req->parent_id, rsp->id, rsp->parent_id);

    log_warn("dyn: close s %d schedule error for req %u:%u "
             "len %"PRIu32" type %d from c %d%c %s", conn->sd, req->id, req->parent_id,
             req->mlen, req->type, c_conn->sd, conn->err ? ':' : ' ',
             conn->err ? strerror(conn->err): " ");
    rstatus_t status =
            conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id,
                                 rsp);
    IGNORE_RET_VAL(status);
    if (req->swallow)
        req_put(req);
}
Exemple #7
0
static void
core_close_log(struct conn *conn)
{
	char *addrstr;

	if ((conn->type == CONN_CLIENT) || (conn->type == CONN_DNODE_PEER_CLIENT)) {
		addrstr = dn_unresolve_peer_desc(conn->sd);
	} else {
		addrstr = dn_unresolve_addr(conn->addr, conn->addrlen);
	}
	log_debug(LOG_NOTICE, "close %s %d '%s' on event %04"PRIX32" eof %d done "
			  "%d rb %zu sb %zu%c %s", conn_get_type_string(conn), conn->sd,
              addrstr, conn->events, conn->eof, conn->done, conn->recv_bytes,
              conn->send_bytes,
              conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : "");

}
Exemple #8
0
static rstatus_t
proxy_accept(struct context *ctx, struct conn *p)
{
    rstatus_t status;
    struct conn *c;
    int sd;

    ASSERT(p->type == CONN_PROXY);
    ASSERT(p->sd > 0);
    ASSERT(p->recv_active && p->recv_ready);

    for (;;) {
        sd = accept(p->sd, NULL, NULL);
        if (sd < 0) {
            if (errno == EINTR) {
                log_warn("accept on %s %d not ready - eintr",
                         conn_get_type_string(p), p->sd);
                continue;
            }

            if (errno == EAGAIN || errno == EWOULDBLOCK) {
                p->recv_ready = 0;
                return DN_OK;
            }

            /*
             * FIXME: On EMFILE or ENFILE mask out IN event on the proxy; mask
             * it back in when some existing connection gets closed
             */

            log_error("accept on %s %d failed: %s",
                      conn_get_type_string(p), p->sd, strerror(errno));
            return DN_ERROR;
        }

        break;
    }

    c = conn_get(p->owner, true, p->data_store);
    if (c == NULL) {
        log_error("get conn for CLIENT %d from %s %d failed: %s", sd,
                  conn_get_type_string(p), p->sd, strerror(errno));
        status = close(sd);
        if (status < 0) {
            log_error("close c %d failed, ignored: %s", sd, strerror(errno));
        }
        return DN_ENOMEM;
    }
    c->sd = sd;

    stats_pool_incr(ctx, c->owner, client_connections);

    status = dn_set_nonblocking(c->sd);
    if (status < 0) {
        log_error("set nonblock on %s %d from p %d failed: %s",
                  conn_get_type_string(c), c->sd, p->sd, strerror(errno));
        conn_close(ctx, c);
        return status;
    }

    if (p->family == AF_INET || p->family == AF_INET6) {
        status = dn_set_tcpnodelay(c->sd);
        if (status < 0) {
            log_warn("set tcpnodelay on %s %d from %s %d failed, ignored: %s",
                     conn_get_type_string(c), c->sd, conn_get_type_string(p),
                     p->sd, strerror(errno));
        }
    }

    status = event_add_conn(ctx->evb, c);
    if (status < 0) {
        log_error("event add conn from %s %d failed: %s",conn_get_type_string(p),
                  p->sd, strerror(errno));
        conn_close(ctx, c);
        return status;
    }

    log_notice("accepted %s %d on %s %d from '%s'", conn_get_type_string(c),
               c->sd, conn_get_type_string(p), p->sd, dn_unresolve_peer_desc(c->sd));

    return DN_OK;
}
Exemple #9
0
struct msg *
rsp_send_next(struct context *ctx, struct conn *conn)
{
    rstatus_t status;
    struct msg *rsp, *req; /* response and it's peer request */

    ASSERT_LOG((conn->type == CONN_DNODE_PEER_CLIENT) ||
               (conn->type = CONN_CLIENT), "conn %s", conn_get_type_string(conn));

    req = TAILQ_FIRST(&conn->omsg_q);
    if (req == NULL || (!req->selected_rsp && !req_done(conn, req))) {
        /* nothing is outstanding, initiate close? */
        if (req == NULL && conn->eof) {
            conn->done = 1;
            log_debug(LOG_INFO, "c %d is done", conn->sd);
        }

        status = event_del_out(ctx->evb, conn);
        if (status != DN_OK) {
            conn->err = errno;
        }

        return NULL;
    }

    rsp = conn->smsg;
    if (rsp != NULL) {
        ASSERT(!rsp->request);
        ASSERT(rsp->peer != NULL);
        req = TAILQ_NEXT(rsp->peer, c_tqe);
    }

    if (req == NULL || !req_done(conn, req)) {
        conn->smsg = NULL;
        return NULL;
    }
    ASSERT(req->request && !req->swallow);

    if (req_error(conn, req)) {
        rsp = rsp_make_error(ctx, conn, req);
        if (rsp == NULL) {
            conn->err = errno;
            return NULL;
        }
        rsp->peer = req;
        req->selected_rsp = rsp;
        log_debug(LOG_VERB, "creating new error rsp %p", rsp);
        if (conn->dyn_mode) {
      	  stats_pool_incr(ctx, peer_forward_error);
        } else {
      	  stats_pool_incr(ctx, forward_error);
        }
    } else {
        rsp = req->selected_rsp;
    }
    ASSERT(!rsp->request);

    conn->smsg = rsp;

    if (log_loggable(LOG_VVERB)) {
       log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", rsp->id, conn->sd);
    }

    return rsp;
}
static void
dnode_req_forward(struct context *ctx, struct conn *conn, struct msg *msg)
{
    struct server_pool *pool;
    uint8_t *key;
    uint32_t keylen;

    if (log_loggable(LOG_DEBUG)) {
       log_debug(LOG_DEBUG, "dnode_req_forward entering ");
    }
    log_debug(LOG_DEBUG, "DNODE REQ RECEIVED %s %d dmsg->id %u",
              conn_get_type_string(conn), conn->sd, msg->dmsg->id);

    ASSERT(conn->type == CONN_DNODE_PEER_CLIENT);

    pool = conn->owner;
    key = NULL;
    keylen = 0;

    log_debug(LOG_DEBUG, "conn %p adding message %d:%d", conn, msg->id, msg->parent_id);
    dictAdd(conn->outstanding_msgs_dict, &msg->id, msg);

    if (!string_empty(&pool->hash_tag)) {
        struct string *tag = &pool->hash_tag;
        uint8_t *tag_start, *tag_end;

        tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]);
        if (tag_start != NULL) {
            tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]);
            if (tag_end != NULL) {
                key = tag_start + 1;
                keylen = (uint32_t)(tag_end - key);
            }
        }
    }

    if (keylen == 0) {
        key = msg->key_start;
        keylen = (uint32_t)(msg->key_end - msg->key_start);
    }

    ASSERT(msg->dmsg != NULL);
    if (msg->dmsg->type == DMSG_REQ) {
       local_req_forward(ctx, conn, msg, key, keylen);
    } else if (msg->dmsg->type == DMSG_REQ_FORWARD) {
        struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr);
        struct datacenter *dc = server_get_dc(pool, &pool->dc);
        uint32_t rack_cnt = array_n(&dc->racks);
        uint32_t rack_index;
        for(rack_index = 0; rack_index < rack_cnt; rack_index++) {
            struct rack *rack = array_get(&dc->racks, rack_index);
            //log_debug(LOG_DEBUG, "forwarding to rack  '%.*s'",
            //            rack->name->len, rack->name->data);
            struct msg *rack_msg;
            if (string_compare(rack->name, &pool->rack) == 0 ) {
                rack_msg = msg;
            } else {
                rack_msg = msg_get(conn, msg->request, __FUNCTION__);
                if (rack_msg == NULL) {
                    log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!");
                    continue;
                }

                if (msg_clone(msg, orig_mbuf, rack_msg) != DN_OK) {
                    msg_put(rack_msg);
                    continue;
                }
                rack_msg->swallow = true;
            }

            if (log_loggable(LOG_DEBUG)) {
               log_debug(LOG_DEBUG, "forwarding request from conn '%s' to rack '%.*s' dc '%.*s' ",
                           dn_unresolve_peer_desc(conn->sd), rack->name->len, rack->name->data, rack->dc->len, rack->dc->data);
            }

            remote_req_forward(ctx, conn, rack_msg, rack, key, keylen);
        }
    }
}
/* Description: link data from a peer connection to a client-facing connection
 * peer_conn: a peer connection
 * msg      : msg with data from the peer connection after parsing
 */
static void
dnode_rsp_forward_match(struct context *ctx, struct conn *peer_conn, struct msg *rsp)
{
    rstatus_t status;
    struct msg *req;
    struct conn *c_conn;

    req = TAILQ_FIRST(&peer_conn->omsg_q);
    c_conn = req->owner;

    /* if client consistency is dc_one forward the response from only the
       local node. Since dyn_dnode_peer is always a remote node, drop the rsp */
    if (req->consistency == DC_ONE) {
        if (req->swallow) {
            dnode_rsp_swallow(ctx, peer_conn, req, rsp);
            return;
        }
        log_warn("req %d:%d with DC_ONE consistency is not being swallowed");
    }

    /* if client consistency is dc_quorum, forward the response from only the
       local region/DC. */
    if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) {
        if (req->swallow) {
            dnode_rsp_swallow(ctx, peer_conn, req, rsp);
            return;
        }
        log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed");
    }

    log_debug(LOG_DEBUG, "DNODE RSP RECEIVED %s %d dmsg->id %u req %u:%u rsp %u:%u, ",
              conn_get_type_string(peer_conn),
              peer_conn->sd, rsp->dmsg->id,
              req->id, req->parent_id, rsp->id, rsp->parent_id);
    ASSERT(req != NULL && req->peer == NULL);
    ASSERT(req->request && !req->done);

    if (log_loggable(LOG_VVERB)) {
        loga("Dumping content for response:   ");
        msg_dump(rsp);

        loga("rsp id %d", rsp->id);

        loga("Dumping content for request:");
        msg_dump(req);

        loga("req id %d", req->id);
    }

    conn_dequeue_outq(ctx, peer_conn, req);
    req->done = 1;

    log_info("c_conn:%p %d:%d <-> %d:%d", c_conn, req->id, req->parent_id,
             rsp->id, rsp->parent_id);
    /* establish rsp <-> req (response <-> request) link */
    req->peer = rsp;
    rsp->peer = req;

    rsp->pre_coalesce(rsp);

    ASSERT_LOG((c_conn->type == CONN_CLIENT) ||
               (c_conn->type == CONN_DNODE_PEER_CLIENT),
               "c_conn type %s", conn_get_type_string(c_conn));

    dnode_rsp_forward_stats(ctx, peer_conn->owner, rsp);
    // c_conn owns respnse now
    status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id,
                                  rsp);
    IGNORE_RET_VAL(status);
    if (req->swallow) {
        log_info("swallow request %d:%d", req->id, req->parent_id);
        req_put(req);
    }
}
/* There are chances that the request to the remote peer or its response got dropped.
 * Hence we may not always receive a response to the request at the head of the FIFO.
 * Hence what we do is we mark that request as errored and move on the next one
 * in the outgoing queue. This works since we always have message ids in monotonically
 * increasing order.
 */
static void
dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp)
{
    rstatus_t status;
    struct msg *req;
    struct conn *c_conn;

    ASSERT(peer_conn->type == CONN_DNODE_PEER_SERVER);

    /* response from a peer implies that peer is ok and heartbeating */
    dnode_peer_ok(ctx, peer_conn);

    /* dequeue peer message (request) from peer conn */
    while (true) {
        req = TAILQ_FIRST(&peer_conn->omsg_q);
        log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp);
        c_conn = req->owner;

        if (!peer_conn->same_dc && req->remote_region_send_time) {
            struct stats *st = ctx->stats;
            uint64_t delay = dn_usec_now() - req->remote_region_send_time;
            histo_add(&st->cross_region_histo, delay);
        }

        if (req->id == rsp->dmsg->id) {
            dnode_rsp_forward_match(ctx, peer_conn, rsp);
            return;
        }
        // Report a mismatch and try to rectify
        log_error("MISMATCH: dnode %s %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u",
                  conn_get_type_string(peer_conn),
                  peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id,
                  rsp->parent_id);
        if (c_conn && conn_to_ctx(c_conn))
            stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner,
                    peer_mismatch_requests);

        // TODO : should you be worried about message id getting wrapped around to 0?
        if (rsp->dmsg->id < req->id) {
            // We received a response from the past. This indeed proves out of order
            // responses. A blunder to the architecture. Log it and drop the response.
            log_error("MISMATCH: received response from the past. Dropping it");
            rsp_put(rsp);
            return;
        }

        if (req->consistency == DC_ONE) {
            if (req->swallow) {
                // swallow the request and move on the next one
                dnode_rsp_swallow(ctx, peer_conn, req, NULL);
                continue;
            }
            log_warn("req %d:%d with DC_ONE consistency is not being swallowed");
        }

        if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) {
            if (req->swallow) {
                // swallow the request and move on the next one
                dnode_rsp_swallow(ctx, peer_conn, req, NULL);
                continue;
            }
            log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed");
        }

        log_error("MISMATCHED DNODE RSP RECEIVED %s %d dmsg->id %u req %u:%u rsp %u:%u, skipping....",
                  conn_get_type_string(peer_conn),
                 peer_conn->sd, rsp->dmsg->id,
                 req->id, req->parent_id, rsp->id, rsp->parent_id);
        ASSERT(req != NULL && req->peer == NULL);
        ASSERT(req->request && !req->done);

        if (log_loggable(LOG_VVERB)) {
            loga("skipping req:   ");
            msg_dump(req);
        }


        conn_dequeue_outq(ctx, peer_conn, req);
        req->done = 1;

        // Create an appropriate response for the request so its propagated up;
        struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store,
                                      __FUNCTION__);
        err_rsp->error = req->error = 1;
        err_rsp->err = req->err = BAD_FORMAT;
        err_rsp->dyn_error = req->dyn_error = BAD_FORMAT;
        err_rsp->dmsg = dmsg_get();
        err_rsp->dmsg->id = req->id;
        log_debug(LOG_VERB, "%p <-> %p", req, err_rsp);
        /* establish err_rsp <-> req (response <-> request) link */
        req->peer = err_rsp;
        err_rsp->peer = req;

        log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u",
                 peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id);
        rstatus_t status =
            conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id,
                                err_rsp);
        IGNORE_RET_VAL(status);
        if (req->swallow) {
                log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id);
            req_put(req);
        }
    }
}