struct datacenter * server_get_dc(struct server_pool *pool, struct string *dcname) { struct datacenter *dc; uint32_t i, len; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "server_get_dc pool '%.*s'", dcname->len, dcname->data); } for (i = 0, len = array_n(&pool->datacenters); i < len; i++) { dc = (struct datacenter *) array_get(&pool->datacenters, i); ASSERT(dc != NULL); ASSERT(dc->name != NULL); if (string_compare(dc->name, dcname) == 0) { return dc; } } dc = array_push(&pool->datacenters); dc_init(dc); string_copy(dc->name, dcname->data, dcname->len); if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "server_get_dc pool about to exit '%.*s'", dc->name->len, dc->name->data); } return dc; }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); } if (r->redis) { return redis_parse_rsp(r); } return memcache_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
static void dnode_rsp_send_done(struct context *ctx, struct conn *conn, struct msg *rsp) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "dnode_rsp_send_done entering"); } struct msg *req; /* peer message (request) */ ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); ASSERT(conn->smsg == NULL); log_debug(LOG_VERB, "dyn: send done rsp %"PRIu64" on c %d", rsp->id, conn->sd); req = rsp->peer; ASSERT(!rsp->request && req->request); ASSERT(req->selected_rsp == rsp); ASSERT(req->done && !req->swallow); log_debug(LOG_DEBUG, "DNODE RSP SENT %s %d dmsg->id %u", conn_get_type_string(conn), conn->sd, req->dmsg->id); /* dequeue request from client outq */ conn_dequeue_outq(ctx, conn, req); req_put(req); }
static bool dnode_req_filter(struct context *ctx, struct conn *conn, struct msg *msg) { ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "dyn: filter empty req %"PRIu64" from c %d", msg->id, conn->sd); } req_put(msg); return true; } /* dynomite handler */ if (msg->dmsg != NULL) { if (dmsg_process(ctx, conn, msg->dmsg)) { req_put(msg); return true; } } return false; }
static void req_forward_remote_dc(struct context *ctx, struct conn *c_conn, struct msg *msg, struct mbuf *orig_mbuf, uint8_t *key, uint32_t keylen, struct datacenter *dc) { uint32_t rack_cnt = array_n(&dc->racks); if (rack_cnt == 0) return; struct rack *rack = dc->preselected_rack_for_replication; if (rack == NULL) rack = array_get(&dc->racks, 0); struct msg *rack_msg = msg_get(c_conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); msg_put(rack_msg); return; } msg_clone(msg, orig_mbuf, rack_msg); log_info("msg (%d:%d) clone to remote rack msg (%d:%d)", msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id); rack_msg->swallow = true; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); } remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); }
void rsp_send_done(struct context *ctx, struct conn *conn, struct msg *rsp) { ASSERT(conn->type == CONN_CLIENT); ASSERT(conn->smsg == NULL); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "send done rsp %"PRIu64" on c %d", rsp->id, conn->sd); } log_debug(LOG_VERB, "conn %p rsp %p done", conn, rsp); struct msg *req = rsp->peer; ASSERT_LOG(req, "response %d does not have a corresponding request", rsp->id); ASSERT_LOG(!req->rsp_sent, "request %d:%d already had a response sent", req->id, req->parent_id); ASSERT(!rsp->request && req->request); ASSERT(req->selected_rsp == rsp); req->rsp_sent = 1; /* dequeue request from client outq */ conn_dequeue_outq(ctx, conn, req); // Remove it from the dict if (!req->awaiting_rsps) { log_debug(LOG_VERB, "conn %p removing message %d:%d", conn, req->id, req->parent_id); dictDelete(conn->outstanding_msgs_dict, &req->id); req_put(req); } else { log_info("req %d:%d still awaiting rsps %d", req->id, req->parent_id, req->awaiting_rsps); } }
static void req_forward_error(struct context *ctx, struct conn *conn, struct msg *msg, err_t err) { if (log_loggable(LOG_INFO)) { log_debug(LOG_INFO, "forward req %"PRIu64" len %"PRIu32" type %d from " "c %d failed: %s", msg->id, msg->mlen, msg->type, conn->sd, strerror(err)); } if (!msg->expect_datastore_reply) { req_put(msg); return; } // Create an appropriate response for the request so its propagated up; // This response gets dropped in rsp_make_error anyways. But since this is // an error path its ok with the overhead. struct msg *rsp = msg_get(conn, false, __FUNCTION__); rsp->peer = msg; rsp->error = 1; rsp->err = err; rstatus_t status = conn_handle_response(conn, msg->id, rsp); IGNORE_RET_VAL(status); }
void dnode_rsp_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg) { log_debug(LOG_VERB, "dnode_rsp_recv_done entering ..."); ASSERT(!conn->dnode_client && !conn->dnode_server); ASSERT(msg != NULL && conn->rmsg == msg); ASSERT(!msg->request); ASSERT(msg->owner == conn); ASSERT(nmsg == NULL || !nmsg->request); if (log_loggable(LOG_VVERB)) { loga("Dumping content for msg: "); msg_dump(msg); if (nmsg != NULL) { loga("Dumping content for nmsg :"); msg_dump(nmsg); } } /* enqueue next message (response), if any */ conn->rmsg = nmsg; if (dnode_rsp_filter(ctx, conn, msg)) { return; } dnode_rsp_forward(ctx, conn, msg); }
void dnode_rsp_send_done(struct context *ctx, struct conn *conn, struct msg *msg) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "dnode_rsp_send_done entering"); } struct msg *pmsg; /* peer message (request) */ ASSERT(conn->dnode_client && !conn->dnode_server); ASSERT(conn->smsg == NULL); log_debug(LOG_VERB, "dyn: send done rsp %"PRIu64" on c %d", msg->id, conn->sd); pmsg = msg->peer; ASSERT(!msg->request && pmsg->request); ASSERT(pmsg->peer == msg); ASSERT(pmsg->done && !pmsg->swallow); log_debug(LOG_DEBUG, "DNODE RSP SENT %c %d dmsg->id %u", conn->dnode_client ? 'c' : (conn->dnode_server ? 's' : 'p'), conn->sd, pmsg->dmsg->id); /* dequeue request from client outq */ conn->dequeue_outq(ctx, conn, pmsg); req_put(pmsg); }
struct rack * server_get_rack(struct datacenter *dc, struct string *rackname) { ASSERT(dc != NULL); ASSERT(dc->dict_rack != NULL); ASSERT(dc->name != NULL); if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "server_get_rack '%.*s'", rackname->len, rackname->data); } /* struct rack *rack = dictFetchValue(dc->dict_rack, rackname); if (rack == NULL) { rack = array_push(&dc->racks); rack_init(rack); string_copy(rack->name, rackname->data, rackname->len); string_copy(rack->dc, dc->name->data, dc->name->len); rack->continuum = dn_alloc(sizeof(struct continuum)); dictAdd(dc->dict_rack, rackname, rack); } */ struct rack *rack; uint32_t i, len; for (i = 0, len = array_n(&dc->racks); i < len; i++) { rack = (struct rack *) array_get(&dc->racks, i); if (string_compare(rack->name, rackname) == 0) { return rack; } } rack = array_push(&dc->racks); rack_init(rack); string_copy(rack->name, rackname->data, rackname->len); string_copy(rack->dc, dc->name->data, dc->name->len); if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "server_get_rack exiting '%.*s'", rack->name->len, rack->name->data); } return rack; }
void dmsg_put(struct dmsg *dmsg) { if (log_loggable(LOG_VVVERB)) { log_debug(LOG_VVVERB, "put dmsg %p id %"PRIu64"", dmsg, dmsg->id); } nfree_dmsgq++; TAILQ_INSERT_HEAD(&free_dmsgq, dmsg, m_tqe); }
static bool rsp_filter(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; ASSERT(!conn->client && !conn->proxy); if (msg_empty(msg)) { ASSERT(conn->rmsg == NULL); log_debug(LOG_VERB, "filter empty rsp %"PRIu64" on s %d", msg->id, conn->sd); rsp_put(msg); return true; } pmsg = TAILQ_FIRST(&conn->omsg_q); if (pmsg == NULL) { log_debug(LOG_VERB, "filter stray rsp %"PRIu64" len %"PRIu32" on s %d", msg->id, msg->mlen, conn->sd); rsp_put(msg); return true; } if (pmsg->noreply) { conn->dequeue_outq(ctx, conn, pmsg); rsp_put(pmsg); rsp_put(msg); return true; } ASSERT(pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); if (pmsg->swallow) { conn->dequeue_outq(ctx, conn, pmsg); pmsg->done = 1; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "swallow rsp %"PRIu64" len %"PRIu32" of req " "%"PRIu64" on s %d", msg->id, msg->mlen, pmsg->id, conn->sd); } rsp_put(msg); req_put(pmsg); return true; } return false; }
static void req_log(struct msg *req) { struct msg *rsp; /* peer message (response) */ int64_t req_time; /* time cost for this request */ char *peer_str; /* peer client ip:port */ uint32_t req_len, rsp_len; /* request and response length */ struct string *req_type; /* request type string */ if (log_loggable(LOG_NOTICE) == 0) { return; } /* a fragment? */ if (req->frag_id != 0 && req->frag_owner != req) { return; } /* conn close normally? */ if (req->mlen == 0) { return; } req_time = nc_usec_now() - req->start_ts; rsp = req->peer; req_len = req->mlen; rsp_len = (rsp != NULL) ? rsp->mlen : 0; if (req->key_end) { req->key_end[0] = '\0'; } /* * FIXME: add backend addr here * Maybe we can store addrstr just like server_pool in conn struct * when connections are resolved */ peer_str = nc_unresolve_peer_desc(req->owner->sd); req_type = msg_type_string(req->type); log_debug(LOG_NOTICE, "req %"PRIu64" done on c %d req_time %"PRIi64".%03"PRIi64 " msec type %.*s narg %"PRIu32" req_len %"PRIu32" rsp_len %"PRIu32 " key0 '%s' peer '%s' done %d error %d", req->id, req->owner->sd, req_time / 1000, req_time % 1000, req_type->len, req_type->data, req->narg, req_len, rsp_len, req->key_start, peer_str, req->done, req->error); }
void dnode_req_send_done(struct context *ctx, struct conn *conn, struct msg *msg) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_VERB, "dnode_req_send_done entering!!!"); } ASSERT(conn->type == CONN_DNODE_PEER_SERVER); // crashes because dmsg is NULL :( /*log_debug(LOG_DEBUG, "DNODE REQ SEND %s %d dmsg->id %u", conn_get_type_string(conn), conn->sd, msg->dmsg->id);*/ if (!conn->same_dc) msg->remote_region_send_time = dn_usec_now(); req_send_done(ctx, conn, msg); }
static void req_forward_all_local_racks(struct context *ctx, struct conn *c_conn, struct msg *msg, struct mbuf *orig_mbuf, uint8_t *key, uint32_t keylen, struct datacenter *dc) { //log_debug(LOG_DEBUG, "dc name '%.*s'", // dc->name->len, dc->name->data); uint8_t rack_cnt = (uint8_t)array_n(&dc->racks); uint8_t rack_index; msg->rsp_handler = msg_get_rsp_handler(msg); init_response_mgr(&msg->rspmgr, msg, msg->is_read, rack_cnt, c_conn); log_info("msg %d:%d same DC racks:%d expect replies %d", msg->id, msg->parent_id, rack_cnt, msg->rspmgr.max_responses); for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "rack name '%.*s'", // rack->name->len, rack->name->data); struct msg *rack_msg; // clone message even for local node struct server_pool *pool = c_conn->owner; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(c_conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed " "now, buddy. no inter-rack messages for " "you!"); continue; } msg_clone(msg, orig_mbuf, rack_msg); log_info("msg (%d:%d) clone to rack msg (%d:%d)", msg->id, msg->parent_id, rack_msg->id, rack_msg->parent_id); rack_msg->swallow = true; } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request to conn '%s' on rack '%.*s'", dn_unresolve_peer_desc(c_conn->sd), rack->name->len, rack->name->data); } log_debug(LOG_VERB, "c_conn: %p forwarding (%d:%d)", c_conn, rack_msg->id, rack_msg->parent_id); remote_req_forward(ctx, c_conn, rack_msg, rack, key, keylen); } }
void server_connected(struct context *ctx, struct conn *conn) { struct server *server = conn->owner; ASSERT(conn->type == CONN_SERVER); ASSERT(conn->connecting && !conn->connected); stats_server_incr(ctx, server, server_connections); conn->connecting = 0; conn->connected = 1; if (log_loggable(LOG_INFO)) { log_debug(LOG_INFO, "connected on s %d to server '%.*s'", conn->sd, server->pname.len, server->pname.data); } }
void server_ok(struct context *ctx, struct conn *conn) { struct server *server = conn->owner; ASSERT(conn->type == CONN_SERVER); ASSERT(conn->connected); if (server->failure_count != 0) { if (log_loggable(LOG_VERB)) { log_debug(LOG_VERB, "reset server '%.*s' failure count from %"PRIu32 " to 0", server->pname.len, server->pname.data, server->failure_count); } server->failure_count = 0; server->next_retry = 0LL; } }
void _msg_dump(const char *file, int line, struct msg *msg, int level, int begin) { struct mbuf *mbuf; listIter *iter; listNode *node; uint8_t *p, *q; long int len; if (log_loggable(level) == 0) { return; } _log(file, line, 0, "msg dump id %"PRIu64" request %d len %"PRIu32" type %d " "(err %d) kind %d result %d mbuf_count %u keys_count %u", msg->id, msg->request, msg->mlen, msg->type, msg->err, msg->kind, msg->result, listLength(msg->data), msg->keys == NULL?0:array_n(msg->keys)); iter = listGetIterator(msg->data, AL_START_HEAD); ASSERT(iter != NULL); while((node = listNext(iter)) != NULL) { mbuf = listNodeValue(node); if (begin) { p = mbuf->start; } else { p = mbuf->pos; } q = mbuf->last; len = q - p; _log(file, line, 0, "mbuf [%p] with %ld bytes of data, pos %p last %p", p, len, mbuf->pos, mbuf->last); _log_hexdump(file, line, p, len, NULL); } listReleaseIterator(iter); }
void rsp_send_done(struct context *ctx, struct conn *conn, struct msg *msg) { struct msg *pmsg; /* peer message (request) */ ASSERT(conn->client && !conn->proxy); ASSERT(conn->smsg == NULL); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "send done rsp %"PRIu64" on c %d", msg->id, conn->sd); } pmsg = msg->peer; ASSERT(!msg->request && pmsg->request); ASSERT(pmsg->peer == msg); ASSERT(pmsg->done && !pmsg->swallow); /* dequeue request from client outq */ conn->dequeue_outq(ctx, conn, pmsg); req_put(pmsg); }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); return data_store_parse_rsp(r); } //Subtract already received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_rsp(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (done_parsing) return; return data_store_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
void dyn_parse_req(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_req, start to process request :::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_REQ && dmsg->type != DMSG_REQ_FORWARD && dmsg->type != GOSSIP_SYN) { r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); data_store_parse_req(r); } //substract alraedy received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_req(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (dmsg->type == GOSSIP_SYN) { //TODOs: need to address multi-buffer msg later dmsg->payload = b->pos; b->pos = b->pos + dmsg->plen; r->pos = b->pos; done_parsing = true; } if (done_parsing) return; return data_store_parse_req(r); } //bad case if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Bad or splitted message"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
static bool dyn_parse_core(struct msg *r) { struct dmsg *dmsg; struct mbuf *b; uint8_t *p, *token; uint8_t ch = ' '; uint64_t num = 0; dyn_state = r->dyn_state; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dyn_state: %d", r->dyn_state); } if (r->dyn_state == DYN_DONE || r->dyn_state == DYN_POST_DONE) return true; b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg = r->dmsg; if (dmsg == NULL) { r->dmsg = dmsg_get(); dmsg = r->dmsg; dmsg->owner = r; if (dmsg == NULL) {//should track this as a dropped message loga("unable to create a new dmsg"); goto error; //should count as OOM error } } token = NULL; for (p = r->pos; p < b->last; p++) { ch = *p; switch (dyn_state) { case DYN_START: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_START"); } if (ch != ' ' && ch != '$') { break; } if (ch == ' ') { if (token == NULL) token = p; break; } if (ch == '$') { if (p + 5 < b->last) { if ((*(p+1) == '2') && (*(p+2) == '0') && (*(p+3) == '1') && (*(p+4) == '4') && (*(p+5) == '$')) { dyn_state = DYN_MAGIC_STRING; p += 5; } else { //goto skip; token = NULL; //reset } } else { goto split; } } else { loga("Facing a weird char %c", p); //goto skip; token = NULL; //reset } break; case DYN_MAGIC_STRING: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MAGIC_STRING"); } if (ch == ' ') { dyn_state = DYN_MSG_ID; num = 0; break; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; loga("Facing a weird char %c", p); //goto skip; dyn_state = DYN_START; } break; case DYN_MSG_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MSG_ID"); log_debug(LOG_DEBUG, "num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "MSG ID : %d", num); } dmsg->id = num; dyn_state = DYN_TYPE_ID; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); //goto skip; token = NULL; //reset dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_TYPE_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_TYPE_ID: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Type Id: %d", num); } dmsg->type = num; dyn_state = DYN_BIT_FIELD; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_BIT_FIELD: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD, num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD : %d", num); } dmsg->bit_field = num & 0xF; dyn_state = DYN_VERSION; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_VERSION: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_VERSION: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "VERSION : %d", num); } dmsg->version = num; dyn_state = DYN_SAME_DC; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_SAME_DC: if (isdigit(ch)) { dmsg->same_dc = ch - '0'; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SAME_DC %d", dmsg->same_dc); } } else if (ch == ' ' && isdigit(*(p-1))) { dyn_state = DYN_DATA_LEN; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA_LEN: num = %d", num); } if (ch == '*') { break; } else if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Data len: %d", num); } dmsg->mlen = num; dyn_state = DYN_DATA; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA"); } if (p + dmsg->mlen < b->last) { dmsg->data = p; p += dmsg->mlen - 1; dyn_state = DYN_SPACES_BEFORE_PAYLOAD_LEN; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); goto split; } break; case DYN_SPACES_BEFORE_PAYLOAD_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_PAYLOAD_LEN"); } if (ch == ' ') { break; } else if (ch == '*') { dyn_state = DYN_PAYLOAD_LEN; num = 0; } break; case DYN_PAYLOAD_LEN: if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == CR) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Payload len: %d", num); } dmsg->plen = num; num = 0; dyn_state = DYN_CRLF_BEFORE_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_CRLF_BEFORE_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_DONE"); } if (*p == LF) { dyn_state = DYN_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DONE"); } r->pos = p; dmsg->payload = p; r->dyn_state = DYN_DONE; b->pos = p; goto done; break; default: NOT_REACHED(); break; } } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Not fully parsed yet!!!!!!"); } split: //this is an attempt recovery when we got a bad message //we try to look for the start the next good one and throw away the bad part if (r->dyn_state == DYN_START) { r->result = MSG_PARSE_AGAIN; if (b->last == b->end) { struct mbuf *nbuf = mbuf_get(); if (nbuf == NULL) { loga("Unable to obtain a new mbuf for replacement!"); mbuf_put(b); nbuf = mbuf_get(); mbuf_insert_head(&r->mhdr, nbuf); r->pos = nbuf->pos; return false; } //replacing the bad mbuf with a new and empty mbuf mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->pos = nbuf->pos; return false; } else { //split it and throw away the bad portion struct mbuf *nbuf; nbuf = mbuf_split(&r->mhdr, r->pos, NULL, NULL); if (nbuf == NULL) { return DN_ENOMEM; } mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); r->pos = nbuf->pos; return false; } } if (mbuf_length(b) == 0 || b->last == b->end) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Would this case ever happen?"); } r->result = MSG_PARSE_AGAIN; return false; } if (r->pos == b->last) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Forward to reading the new block of data"); } r->dyn_state = DYN_START; r->result = MSG_PARSE_AGAIN; token = NULL; return false; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "in split"); } r->dyn_state = DYN_START; r->pos = token; r->result = MSG_PARSE_REPAIR; if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, b->pos, mbuf_length(b), "split and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "split and inspecting full req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return false; done: r->pos = p; dmsg->source_address = r->owner->addr; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "at done with p at %d", p); log_hexdump(LOG_VVERB, r->pos, b->last - r->pos, "done and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return true; error: log_debug(LOG_ERR, "at error for state %d and c %c", dyn_state, *p); r->result = MSG_PARSE_ERROR; r->pos = p; errno = EINVAL; if (log_loggable(LOG_ERR)) { log_hexdump(LOG_ERR, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); log_hexdump(LOG_ERR, p, b->last - p, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); } r->dyn_state = dyn_state; return false; }
struct msg * rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; struct msg *rsp, *req; /* response and it's peer request */ ASSERT_LOG((conn->type == CONN_DNODE_PEER_CLIENT) || (conn->type = CONN_CLIENT), "conn %s", conn_get_type_string(conn)); req = TAILQ_FIRST(&conn->omsg_q); if (req == NULL || (!req->selected_rsp && !req_done(conn, req))) { /* nothing is outstanding, initiate close? */ if (req == NULL && conn->eof) { conn->done = 1; log_debug(LOG_INFO, "c %d is done", conn->sd); } status = event_del_out(ctx->evb, conn); if (status != DN_OK) { conn->err = errno; } return NULL; } rsp = conn->smsg; if (rsp != NULL) { ASSERT(!rsp->request); ASSERT(rsp->peer != NULL); req = TAILQ_NEXT(rsp->peer, c_tqe); } if (req == NULL || !req_done(conn, req)) { conn->smsg = NULL; return NULL; } ASSERT(req->request && !req->swallow); if (req_error(conn, req)) { rsp = rsp_make_error(ctx, conn, req); if (rsp == NULL) { conn->err = errno; return NULL; } rsp->peer = req; req->selected_rsp = rsp; log_debug(LOG_VERB, "creating new error rsp %p", rsp); if (conn->dyn_mode) { stats_pool_incr(ctx, peer_forward_error); } else { stats_pool_incr(ctx, forward_error); } } else { rsp = req->selected_rsp; } ASSERT(!rsp->request); conn->smsg = rsp; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "send next rsp %"PRIu64" on c %d", rsp->id, conn->sd); } return rsp; }
static void dnode_req_forward(struct context *ctx, struct conn *conn, struct msg *msg) { struct server_pool *pool; uint8_t *key; uint32_t keylen; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dnode_req_forward entering "); } log_debug(LOG_DEBUG, "DNODE REQ RECEIVED %s %d dmsg->id %u", conn_get_type_string(conn), conn->sd, msg->dmsg->id); ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); pool = conn->owner; key = NULL; keylen = 0; log_debug(LOG_DEBUG, "conn %p adding message %d:%d", conn, msg->id, msg->parent_id); dictAdd(conn->outstanding_msgs_dict, &msg->id, msg); if (!string_empty(&pool->hash_tag)) { struct string *tag = &pool->hash_tag; uint8_t *tag_start, *tag_end; tag_start = dn_strchr(msg->key_start, msg->key_end, tag->data[0]); if (tag_start != NULL) { tag_end = dn_strchr(tag_start + 1, msg->key_end, tag->data[1]); if (tag_end != NULL) { key = tag_start + 1; keylen = (uint32_t)(tag_end - key); } } } if (keylen == 0) { key = msg->key_start; keylen = (uint32_t)(msg->key_end - msg->key_start); } ASSERT(msg->dmsg != NULL); if (msg->dmsg->type == DMSG_REQ) { local_req_forward(ctx, conn, msg, key, keylen); } else if (msg->dmsg->type == DMSG_REQ_FORWARD) { struct mbuf *orig_mbuf = STAILQ_FIRST(&msg->mhdr); struct datacenter *dc = server_get_dc(pool, &pool->dc); uint32_t rack_cnt = array_n(&dc->racks); uint32_t rack_index; for(rack_index = 0; rack_index < rack_cnt; rack_index++) { struct rack *rack = array_get(&dc->racks, rack_index); //log_debug(LOG_DEBUG, "forwarding to rack '%.*s'", // rack->name->len, rack->name->data); struct msg *rack_msg; if (string_compare(rack->name, &pool->rack) == 0 ) { rack_msg = msg; } else { rack_msg = msg_get(conn, msg->request, __FUNCTION__); if (rack_msg == NULL) { log_debug(LOG_VERB, "whelp, looks like yer screwed now, buddy. no inter-rack messages for you!"); continue; } if (msg_clone(msg, orig_mbuf, rack_msg) != DN_OK) { msg_put(rack_msg); continue; } rack_msg->swallow = true; } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "forwarding request from conn '%s' to rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(conn->sd), rack->name->len, rack->name->data, rack->dc->len, rack->dc->data); } remote_req_forward(ctx, conn, rack_msg, rack, key, keylen); } } }
rstatus_t server_connect(struct context *ctx, struct server *server, struct conn *conn) { rstatus_t status; ASSERT(conn->type == CONN_SERVER); if (conn->sd > 0) { /* already connected on server connection */ return DN_OK; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "connect to server '%.*s'", server->pname.len, server->pname.data); } conn->sd = socket(conn->family, SOCK_STREAM, 0); if (conn->sd < 0) { log_error("socket for server '%.*s' failed: %s", server->pname.len, server->pname.data, strerror(errno)); status = DN_ERROR; goto error; } status = dn_set_nonblocking(conn->sd); if (status != DN_OK) { log_error("set nonblock on s %d for server '%.*s' failed: %s", conn->sd, server->pname.len, server->pname.data, strerror(errno)); goto error; } if (server->pname.data[0] != '/') { status = dn_set_tcpnodelay(conn->sd); if (status != DN_OK) { log_warn("set tcpnodelay on s %d for server '%.*s' failed, ignored: %s", conn->sd, server->pname.len, server->pname.data, strerror(errno)); } } status = event_add_conn(ctx->evb, conn); if (status != DN_OK) { log_error("event add conn s %d for server '%.*s' failed: %s", conn->sd, server->pname.len, server->pname.data, strerror(errno)); goto error; } ASSERT(!conn->connecting && !conn->connected); status = connect(conn->sd, conn->addr, conn->addrlen); if (status != DN_OK) { if (errno == EINPROGRESS) { conn->connecting = 1; log_debug(LOG_DEBUG, "connecting on s %d to server '%.*s'", conn->sd, server->pname.len, server->pname.data); return DN_OK; } log_error("connect on s %d to server '%.*s' failed: %s", conn->sd, server->pname.len, server->pname.data, strerror(errno)); goto error; } ASSERT(!conn->connecting); conn->connected = 1; log_debug(LOG_INFO, "connected on s %d to server '%.*s'", conn->sd, server->pname.len, server->pname.data); return DN_OK; error: conn->err = errno; return status; }
/* dnode sends a response back to a peer */ static struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; // SMB: There is some non trivial thing happening here. And I think it is very // important to read this before anything is changed in here. There is also a // bug that exists which I will mention briefly: // A message is a structure that has a list of mbufs which hold the actual data. // Each mbuf has start, pos, last as pointers (amongst others) which indicate start of the // buffer, current read position and end of the buffer respectively. // // Every time a message is sent to a peer within dynomite, a DNODE header is // prepended which is created using dmsg_write. A message remembers this case // in dnode_header_prepended, so that if the messsage is sent in parts, the // header is not prepended again for the subsequent parts. // // Like I said earlier there is a pos pointer in mbuf. If a message is sent // partially (or it is parsed partially too I think) the pos reflects that // case such that things can be resumed where it left off. // // dmsg_write has a parameter which reflects the payload length following the // dnode header calculated by msg_length. msg_length is a summation of all // mbuf sizes (last - start). Which I think is wrong. // // +------------+ +---------------+ // | DC1N1 +---------> | DC2N1 | // +------------+ +-------+-------+ // | // | // | // | // +-------v-------+ // | DC2N2 | // +---------------+ // // Consider the case where // a node DC1N1 in region DC1 sends a request to DC2N1 which forwards it to // to local token owner DC2N2. Now DC2N1 receives a response from DC2N2 which // has to be relayed back to DC1N1. This response from DC2N2 already has a // dnode header but for the link between DC2N1 and DC2N2. DC2N1 should strip // this header and prepend its own header for sending it back to DC1N1. This // gets handled in encryption case since we overwrite all mbufs in the response // However if the encryption is off, the message length sent to dmsg_write // consists of the header from DC2N2 also which is wrong. So this relaying // of responses will not work for the case where encryption is disabled. // // So msg_length should really be from mbuf->pos and not mbuf->start. This // is a problem only with remote region replication since that is the only // case where we CAN have 2 hops to send the request/response. This is also // not a problem if encryption is ON. ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; if (rsp->dnode_header_prepended) { return rsp; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_debug(LOG_VERB, "sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } rsp->dnode_header_prepended = 1; mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_info("sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* There are chances that the request to the remote peer or its response got dropped. * Hence we may not always receive a response to the request at the head of the FIFO. * Hence what we do is we mark that request as errored and move on the next one * in the outgoing queue. This works since we always have message ids in monotonically * increasing order. */ static void dnode_rsp_forward(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; ASSERT(!peer_conn->dnode_client && !peer_conn->dnode_server); /* response from a peer implies that peer is ok and heartbeating */ dnode_peer_ok(ctx, peer_conn); /* dequeue peer message (request) from peer conn */ while (true) { req = TAILQ_FIRST(&peer_conn->omsg_q); log_debug(LOG_VERB, "dnode_rsp_forward entering req %p rsp %p...", req, rsp); c_conn = req->owner; if (req->id == rsp->dmsg->id) { dnode_rsp_forward_match(ctx, peer_conn, rsp); return; } // Report a mismatch and try to rectify log_error("MISMATCH: dnode %c %d rsp_dmsg_id %u req %u:%u dnode rsp %u:%u", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); if (c_conn && conn_to_ctx(c_conn)) stats_pool_incr(conn_to_ctx(c_conn), c_conn->owner, peer_mismatch_requests); // TODO : should you be worried about message id getting wrapped around to 0? if (rsp->dmsg->id < req->id) { // We received a response from the past. This indeed proves out of order // responses. A blunder to the architecture. Log it and drop the response. log_error("MISMATCH: received response from the past. Dropping it"); dnode_rsp_put(rsp); return; } if (req->consistency == DC_ONE) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { // swallow the request and move on the next one dnode_rsp_swallow(ctx, peer_conn, req, NULL); continue; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_error("MISMATCHED DNODE RSP RECEIVED %c %d dmsg->id %u req %u:%u rsp %u:%u, skipping....", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("skipping req: "); msg_dump(req); } peer_conn->dequeue_outq(ctx, peer_conn, req); req->done = 1; // Create an appropriate response for the request so its propagated up; struct msg *err_rsp = msg_get(peer_conn, false, peer_conn->data_store); err_rsp->error = req->error = 1; err_rsp->err = req->err = BAD_FORMAT; err_rsp->dyn_error = req->dyn_error = BAD_FORMAT; err_rsp->dmsg = dmsg_get(); err_rsp->dmsg->id = req->id; log_debug(LOG_VERB, "%p <-> %p", req, err_rsp); /* establish err_rsp <-> req (response <-> request) link */ req->peer = err_rsp; err_rsp->peer = req; log_error("Peer connection s %d skipping request %u:%u, dummy err_rsp %u:%u", peer_conn->sd, req->id, req->parent_id, err_rsp->id, err_rsp->parent_id); rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, err_rsp); IGNORE_RET_VAL(status); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }
static void req_log(struct msg *req) { struct msg *rsp; /* peer message (response) */ int64_t req_time; /* time cost for this request */ char *peer_str; /* peer client ip:port */ uint32_t req_len, rsp_len; /* request and response length */ struct string *req_type; /* request type string */ struct keypos *kpos; if (log_loggable(LOG_NOTICE) == 0) { return; } /* a fake request? */ if (req->owner == NULL) { return; } /* a fragment? */ if (req->frag_id != 0 && req->frag_owner != req) { return; } /* conn close normally? */ if (req->mlen == 0) { return; } /* * there is a race scenario where a requests comes in, the log level is not LOG_NOTICE, * and before the response arrives you modify the log level to LOG_NOTICE * using SIGTTIN OR SIGTTOU, then req_log() wouldn't have msg->start_ts set */ if (req->start_ts == 0) { return; } req_time = nc_usec_now() - req->start_ts; rsp = req->peer; req_len = req->mlen; rsp_len = (rsp != NULL) ? rsp->mlen : 0; if (array_n(req->keys) < 1) { return; } kpos = array_get(req->keys, 0); if (kpos->end != NULL) { *(kpos->end) = '\0'; } /* * FIXME: add backend addr here * Maybe we can store addrstr just like server_pool in conn struct * when connections are resolved */ peer_str = nc_unresolve_peer_desc(req->owner->sd); req_type = msg_type_string(req->type); log_debug(LOG_NOTICE, "req %"PRIu64" done on c %d req_time %"PRIi64".%03"PRIi64 " msec type %.*s narg %"PRIu32" req_len %"PRIu32" rsp_len %"PRIu32 " key0 '%s' peer '%s' done %d error %d", req->id, req->owner->sd, req_time / 1000, req_time % 1000, req_type->len, req_type->data, req->narg, req_len, rsp_len, kpos->start, peer_str, req->done, req->error); }
/* Description: link data from a peer connection to a client-facing connection * peer_conn: a peer connection * msg : msg with data from the peer connection after parsing */ static void dnode_rsp_forward_match(struct context *ctx, struct conn *peer_conn, struct msg *rsp) { rstatus_t status; struct msg *req; struct conn *c_conn; req = TAILQ_FIRST(&peer_conn->omsg_q); c_conn = req->owner; /* if client consistency is dc_one forward the response from only the local node. Since dyn_dnode_peer is always a remote node, drop the rsp */ if (req->consistency == DC_ONE) { if (req->swallow) { dnode_rsp_swallow(ctx, peer_conn, req, rsp); return; } log_warn("req %d:%d with DC_ONE consistency is not being swallowed"); } /* if client consistency is dc_quorum, forward the response from only the local region/DC. */ if ((req->consistency == DC_QUORUM) && !peer_conn->same_dc) { if (req->swallow) { dnode_rsp_swallow(ctx, peer_conn, req, rsp); return; } log_warn("req %d:%d with DC_QUORUM consistency is not being swallowed"); } log_debug(LOG_DEBUG, "DNODE RSP RECEIVED %c %d dmsg->id %u req %u:%u rsp %u:%u, ", peer_conn->dnode_client ? 'c' : (peer_conn->dnode_server ? 's' : 'p'), peer_conn->sd, rsp->dmsg->id, req->id, req->parent_id, rsp->id, rsp->parent_id); ASSERT(req != NULL && req->peer == NULL); ASSERT(req->request && !req->done); if (log_loggable(LOG_VVERB)) { loga("Dumping content for response: "); msg_dump(rsp); loga("rsp id %d", rsp->id); loga("Dumping content for request:"); msg_dump(req); loga("req id %d", req->id); } peer_conn->dequeue_outq(ctx, peer_conn, req); req->done = 1; log_debug(LOG_VERB, "%p <-> %p", req, rsp); /* establish rsp <-> req (response <-> request) link */ req->peer = rsp; rsp->peer = req; rsp->pre_coalesce(rsp); ASSERT((c_conn->client && !c_conn->proxy) || (c_conn->dnode_client && !c_conn->dnode_server)); dnode_rsp_forward_stats(ctx, peer_conn->owner, rsp); if (TAILQ_FIRST(&c_conn->omsg_q) != NULL && dnode_req_done(c_conn, req)) { log_debug(LOG_INFO, "handle rsp %d:%d for req %d:%d conn %p", rsp->id, rsp->parent_id, req->id, req->parent_id, c_conn); // c_conn owns respnse now rstatus_t status = conn_handle_response(c_conn, req->parent_id ? req->parent_id : req->id, rsp); if (req->swallow) { log_debug(LOG_INFO, "swallow request %d:%d", req->id, req->parent_id); req_put(req); } } }