/* * Split mbuf h into h and t by copying data from h to t. Before * the copy, we invoke a precopy handler cb that will copy a predefined * string to the head of t. * * Return new mbuf t, if the split was successful. */ struct mbuf * mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg) { struct mbuf *mbuf, *nbuf; size_t size; ASSERT(!STAILQ_EMPTY(h)); mbuf = STAILQ_LAST(h, mbuf, next); ASSERT(pos >= mbuf->pos && pos <= mbuf->last); nbuf = mbuf_get(); if (nbuf == NULL) { return NULL; } if (cb != NULL) { /* precopy nbuf */ cb(nbuf, cbarg); } /* copy data from mbuf to nbuf */ size = (size_t)(mbuf->last - pos); mbuf_copy(nbuf, pos, size); /* adjust mbuf */ mbuf->last = pos; log_debug(LOG_VVERB, "split into mbuf %p len %"PRIu32" and nbuf %p len " "%"PRIu32" copied %zu bytes", mbuf, mbuf_length(mbuf), nbuf, mbuf_length(nbuf), size); return nbuf; }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); } if (r->redis) { return redis_parse_rsp(r); } return memcache_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
int msg_data_compare(struct msg *msg1, struct msg *msg2) { int ret; listNode *lnode1, *lnode2; struct mbuf *mbuf1, *mbuf2; uint32_t len; if (msg1 == NULL && msg2 == NULL) { return 0; } else if (msg1 == NULL && msg2 != NULL) { return -1; } else if (msg1 != NULL && msg2 == NULL) { return 1; } lnode1 = listFirst(msg1->data); lnode2 = listFirst(msg2->data); while (lnode1 && lnode2) { mbuf1 = listNodeValue(lnode1); mbuf2 = listNodeValue(lnode2); len = MIN(mbuf_length(mbuf1),mbuf_length(mbuf2)); ret = memcmp(mbuf1->pos, mbuf2->pos, len); if (ret != 0) { return ret; } mbuf1->pos += len; mbuf2->pos += len; msg1->mlen -= len; msg2->mlen -= len; if (mbuf_length(mbuf1) == 0) { lnode1 = lnode1->next; } if (mbuf_length(mbuf2) == 0) { lnode2 = lnode2->next; } } if (msg1->mlen > 0) { return 1; } else if (msg2->mlen > 0) { return -1; } return 0; }
int _msg_check(const char *file, int line, rmtContext *ctx, struct msg *msg, int panic) { struct mbuf *mbuf; listIter *iter; listNode *node; uint32_t total_mbuf_len = 0; int err = 0; if (msg == NULL) { return RMT_ERROR; } //check msg length iter = listGetIterator(msg->data, AL_START_HEAD); while ((node = listNext(iter)) != NULL) { mbuf = listNodeValue(node); total_mbuf_len += mbuf_length(mbuf); if (mbuf->pos < mbuf->start) { _log(file, line, 0, "MSG CHECK Error: mbuf->pos(%p) < mbuf->start(%p)", mbuf->pos, mbuf->start); err = 1; } if (mbuf->pos > mbuf->last) { _log(file, line, 0, "MSG CHECK Error: mbuf->pos(%p) > mbuf->last(%p)", mbuf->pos, mbuf->last); err = 1; } } listReleaseIterator(iter); if (msg->mlen != total_mbuf_len) { _log(file, line, 0, "MSG CHECK Error: msg->mlen(%u) != total_mbuf_len(%u)", msg->mlen, total_mbuf_len); err = 1; } if (msg->request == 1) { if (memcmp(ctx->cmd, RMT_CMD_REDIS_MIGRATE, MIN(sdslen(ctx->cmd),strlen(RMT_CMD_REDIS_MIGRATE))) == 0 && msg->noreply != ctx->noreply) { _log(file, line, 0, "MSG CHECK Error: msg->noreply(%u) != ctx->noreply(%d)", msg->noreply, ctx->noreply); err = 1; } } if (err) goto error; return RMT_OK; error: MSG_DUMP(msg, LOG_ERR, 0); if (panic) { rmt_stacktrace(1); abort(); } return RMT_ERROR; }
rstatus_t dmsg_write(struct mbuf *mbuf, uint64_t msg_id, uint8_t type, struct conn *conn, uint32_t payload_len) { mbuf_write_string(mbuf, &MAGIC_STR); mbuf_write_uint64(mbuf, msg_id); mbuf_write_char(mbuf, ' '); mbuf_write_uint8(mbuf, type); mbuf_write_char(mbuf, ' '); //encryption bit if (conn->dnode_secured) { mbuf_write_uint8(mbuf, 1); } else { mbuf_write_uint8(mbuf, 0); } mbuf_write_char(mbuf, ' '); mbuf_write_uint8(mbuf, version); //mbuf_write_string(mbuf, &CRLF_STR); mbuf_write_char(mbuf, ' '); mbuf_write_char(mbuf, '*'); //write aes key unsigned char *aes_key = conn->aes_key; if (conn->dnode_secured && conn->dnode_crypto_state == 0) { mbuf_write_uint32(mbuf, AES_ENCRYPTED_KEYLEN); } else { mbuf_write_uint32(mbuf, 1); } mbuf_write_char(mbuf, ' '); //mbuf_write_string(mbuf, data); if (conn->dnode_secured && conn->dnode_crypto_state == 0) { #ifdef DN_DEBUG_LOG loga("AES key to be encrypted : %s \n", base64_encode(aes_key, 32)); #endif dyn_rsa_encrypt(aes_key, aes_encrypted_buf); mbuf_write_bytes(mbuf, aes_encrypted_buf, AES_ENCRYPTED_KEYLEN); conn->dnode_crypto_state = 1; } else { mbuf_write_char(mbuf, 'd'); //TODOs: replace with another string } //mbuf_write_string(mbuf, &CRLF_STR); mbuf_write_char(mbuf, ' '); mbuf_write_char(mbuf, '*'); mbuf_write_uint32(mbuf, payload_len); mbuf_write_string(mbuf, &CRLF_STR); #ifdef DN_DEBUG_LOG log_hexdump(LOG_VERB, mbuf->pos, mbuf_length(mbuf), "dyn message producer: "); #endif return DN_OK; }
static void * gossip_loop(void *arg) { struct server_pool *sp = arg; uint64_t gossip_interval = gn_pool.g_interval * 1000; seeds_buf = mbuf_alloc(SEED_BUF_SIZE); log_debug(LOG_VVERB, "gossip_interval : %d msecs", gn_pool.g_interval); for(;;) { usleep(gossip_interval); log_debug(LOG_VERB, "Gossip is running ..."); if (gn_pool.seeds_provider != NULL && gn_pool.seeds_provider(sp->ctx, seeds_buf) == DN_OK) { log_debug(LOG_VERB, "Got seed nodes '%.*s'", mbuf_length(seeds_buf), seeds_buf->pos); gossip_update_seeds(sp, seeds_buf); } current_node->ts = (uint64_t) time(NULL); gossip_process_msgs(); if (current_node->state == NORMAL) { gn_pool.ctx->dyn_state = NORMAL; } if (!sp->ctx->enable_gossip) { //gossip_debug(); continue; //no gossiping } if (node_count == 1) { //single node deployment gn_pool.ctx->dyn_state = NORMAL; continue; } //STANDBY state for warm bootstrap if (gn_pool.ctx->dyn_state == STANDBY) continue; if (gn_pool.ctx->dyn_state == JOINING) { log_debug(LOG_NOTICE, "I am still joining the ring!"); //aggressively contact all known nodes before changing to state NORMAL gossip_announce_joining(sp); usleep(MAX(gn_pool.ctx->timeout, gossip_interval) * 2); } else if (gn_pool.ctx->dyn_state == NORMAL) { gossip_forward_state(sp); } gossip_debug(); } //end for loop mbuf_dealloc(seeds_buf); seeds_buf = NULL; return NULL; }
static rstatus_t conn_send_queue(struct conn *conn) { struct mbuf *mbuf, *nbuf; /* current and next mbuf */ size_t mlen; /* current mbuf data length */ ssize_t n; for (mbuf = STAILQ_FIRST(&conn->send_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); if (mbuf_empty(mbuf)) { continue; } mlen = mbuf_length(mbuf); n = conn_send_buf(conn, mbuf->pos, mlen); if (n < 0) { if (n == NC_EAGAIN) { return NC_OK; } return NC_ERROR; } mbuf->pos += n; if (n < mlen) { ASSERT(mbuf->pos < mbuf->end); return NC_OK; } ASSERT(mbuf->pos == mbuf->last); mbuf_remove(&conn->send_queue, mbuf); mbuf_put(mbuf); } conn->send_ready = 0; return NC_OK; }
static rstatus_t sentinel_proc_pub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string pool_name, server_name, server_ip, tmp_string, pub_titile, pub_event; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&pub_titile, "pmessage"); string_set_text(&pub_event, "+switch-master"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for pub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_titile, &tmp_string)) { log_error("pub title error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 7 for pub event */ msg_read_line(msg, line_buf, 4); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_event, &tmp_string)) { log_error("pub channel error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 9 for pub info */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } /* parse switch master info */ /* get pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get pool name string failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ' ', &server_name); if (status != NC_OK) { log_error("get server name string failed."); goto error; } /* skip old ip and port string */ status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old ip string failed."); goto error; } status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old port string failed."); goto error; } /* get new server ip string */ status = mbuf_read_string(line_buf, ' ', &server_ip); if (status != NC_OK) { log_error("get new server ip string failed."); goto error; } /* get new server port */ status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { log_error("get new server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); if (status == NC_OK) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&server_ip); string_deinit(&server_name); string_deinit(&pool_name); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_acksub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string sub_titile, sub_channel, sub_ok, tmp_string; struct mbuf *line_buf; string_init(&tmp_string); string_set_text(&sub_titile, "psubscribe"); string_set_text(&sub_channel, "+switch-master"); string_set_text(&sub_ok, ":1"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for sub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_titile, &tmp_string)) { goto error; } /* get line in line num 5 for sub channel */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_channel, &tmp_string)) { goto error; } /* get sub status */ msg_read_line(msg, line_buf, 1); if (line_buf == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_ok, &tmp_string)) { goto error; } log_debug(LOG_INFO, "success sub channel %.*s from sentinel", sub_channel.len, sub_channel.data); status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_sentinel_info(struct context *ctx, struct msg *msg) { rstatus_t status; int i, master_num, switch_num; struct string pool_name, server_name, server_ip, tmp_string, sentinel_masters_prefix, master_ok; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&sentinel_masters_prefix, "sentinel_masters"); string_set_text(&master_ok, "status=ok"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get sentinel master num at line 3 */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } status = mbuf_read_string(line_buf, ':', &tmp_string); if (status != NC_OK || string_compare(&sentinel_masters_prefix, &tmp_string)) { goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { goto error; } master_num = nc_atoi(tmp_string.data, tmp_string.len); if (master_num < 0) { log_error("parse master number from sentinel ack info failed."); goto error; } /* skip 3 line in ack info which is not used. */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } /* parse master info from sentinel ack info */ switch_num = 0; for (i = 0; i < master_num; i++) { msg_read_line(msg, line_buf, 1); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when parse master item."); goto error; } log_debug(LOG_INFO, "master item line : %.*s", mbuf_length(line_buf), line_buf->pos); /* skip master item prefix */ status = mbuf_read_string(line_buf, ':', NULL); if (status != NC_OK) { log_error("skip master item prefix failed"); goto error; } /* skip master item server name prefix */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item server name prefix failed."); goto error; } /* get server pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get server pool name failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ',', &server_name); if (status != NC_OK) { log_error("get server name failed."); goto error; } /* get master status */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get master status failed."); goto error; } if (string_compare(&master_ok, &tmp_string)) { log_error("master item status is not ok, use it anyway"); } /* skip ip string prefix name */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item address prefix failed."); goto error; } /* get server ip string */ status = mbuf_read_string(line_buf, ':', &server_ip); if (status != NC_OK) { log_error("get server ip string failed."); goto error; } /* get server port */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); /* if server is switched, add switch number */ if (status == NC_OK) { switch_num++; } } if (switch_num > 0) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&pool_name); string_deinit(&server_name); string_deinit(&server_ip); return status; error: status = NC_ERROR; goto done; }
static void rsp_forward(struct context *ctx, struct conn *s_conn, struct msg *msg) { rstatus_t status; struct msg *pmsg; struct conn *c_conn; ASSERT(!s_conn->client && !s_conn->proxy); /* response from server implies that server is ok and heartbeating */ server_ok(ctx, s_conn); /* dequeue peer message (request) from server */ pmsg = TAILQ_FIRST(&s_conn->omsg_q); ASSERT(pmsg != NULL && pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); s_conn->dequeue_outq(ctx, s_conn, pmsg); pmsg->done = 1; /* establish msg <-> pmsg (response <-> request) link */ pmsg->peer = msg; msg->peer = pmsg; /* * Readjust responses of fragmented messages by not including the end * marker for all but the last response * * Valid responses for a fragmented requests are MSG_RSP_VALUE or, * MSG_RSP_END. For an invalid response, we send out SERVER_ERRROR with * EINVAL errno */ if (pmsg->frag_id != 0) { if (msg->type != MSG_RSP_VALUE && msg->type != MSG_RSP_END) { pmsg->error = 1; pmsg->err = EINVAL; } else if (!pmsg->last_fragment) { ASSERT(msg->end != NULL); for (;;) { struct mbuf *mbuf; mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next); ASSERT(mbuf != NULL); /* * We cannot assert that end marker points to the last mbuf * Consider a scenario where end marker points to the * penultimate mbuf and the last mbuf only contains spaces * and CRLF: mhdr -> [...END] -> [\r\n] */ if (msg->end >= mbuf->pos && msg->end < mbuf->last) { /* end marker is within this mbuf */ msg->mlen -= (uint32_t)(mbuf->last - msg->end); mbuf->last = msg->end; break; } /* end marker is not in this mbuf */ msg->mlen -= mbuf_length(mbuf); mbuf_remove(&msg->mhdr, mbuf); mbuf_put(mbuf); } } } c_conn = pmsg->owner; ASSERT(c_conn->client && !c_conn->proxy); if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { status = event_add_out(ctx->ep, c_conn); if (status != NC_OK) { c_conn->err = errno; } } rsp_forward_stats(ctx, s_conn->owner, msg); }
/* * Sending a mbuf of gossip data over the wire to a peer */ void dnode_peer_gossip_forward(struct context *ctx, struct conn *conn, bool redis, struct mbuf *data_buf) { rstatus_t status; struct msg *msg = msg_get(conn, 1, redis); if (msg == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a msg"); return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a data_buf"); msg_put(msg); return; } uint64_t msg_id = peer_msg_id++; if (conn->dnode_secured) { log_debug(LOG_VERB, "Assemble a secured msg to send"); log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an data_buf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_insert(&msg->mhdr, encrypted_buf); //free data_buf as no one will need it again mbuf_put(data_buf); } else { log_debug(LOG_VERB, "Assemble a non-secured msg to send"); dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert_head(&msg->mhdr, header_buf); mbuf_insert(&msg->mhdr, data_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn gossip message header: "); msg_dump(msg); } /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&conn->imsg_q)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { dnode_req_forward_error(ctx, conn, msg); conn->err = errno; return; } } //need to handle a reply //conn->enqueue_outq(ctx, conn, msg); msg->noreply = 1; conn->enqueue_inq(ctx, conn, msg); }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_info("sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
void memcache_parse_rsp(struct msg *r) { struct mbuf *b; uint8_t *p, *m; uint8_t ch; enum { SW_START, SW_RSP_NUM, SW_RSP_STR, SW_SPACES_BEFORE_KEY, SW_KEY, SW_SPACES_BEFORE_FLAGS, /* 5 */ SW_FLAGS, SW_SPACES_BEFORE_VLEN, SW_VLEN, SW_RUNTO_VAL, SW_VAL, /* 10 */ SW_VAL_LF, SW_END, SW_RUNTO_CRLF, SW_CRLF, SW_ALMOST_DONE, /* 15 */ SW_SENTINEL } state; state = r->state; b = STAILQ_LAST(&r->mhdr, mbuf, next); ASSERT(!r->request); ASSERT(!r->redis); ASSERT(state >= SW_START && state < SW_SENTINEL); ASSERT(b != NULL); ASSERT(b->pos <= b->last); /* validate the parsing marker */ ASSERT(r->pos != NULL); ASSERT(r->pos >= b->pos && r->pos <= b->last); for (p = r->pos; p < b->last; p++) { ch = *p; switch (state) { case SW_START: if (isdigit(ch)) { state = SW_RSP_NUM; } else { state = SW_RSP_STR; } p = p - 1; /* go back by 1 byte */ break; case SW_RSP_NUM: if (r->token == NULL) { /* rsp_start <- p; type_start <- p */ r->token = p; } if (isdigit(ch)) { /* num <- num * 10 + (ch - '0') */ ; } else if (ch == ' ' || ch == CR) { /* type_end <- p - 1 */ r->token = NULL; r->type = MSG_RSP_MC_NUM; p = p - 1; /* go back by 1 byte */ state = SW_CRLF; } else { goto error; } break; case SW_RSP_STR: if (r->token == NULL) { /* rsp_start <- p; type_start <- p */ r->token = p; } if (ch == ' ' || ch == CR) { /* type_end <- p - 1 */ m = r->token; /* r->token = NULL; */ r->type = MSG_UNKNOWN; switch (p - m) { case 3: if (str4cmp(m, 'E', 'N', 'D', '\r')) { r->type = MSG_RSP_MC_END; /* end_start <- m; end_end <- p - 1 */ r->end = m; break; } break; case 5: if (str5cmp(m, 'V', 'A', 'L', 'U', 'E')) { /* * Encompasses responses for 'get', 'gets' and * 'cas' command. */ r->type = MSG_RSP_MC_VALUE; break; } if (str5cmp(m, 'E', 'R', 'R', 'O', 'R')) { r->type = MSG_RSP_MC_ERROR; break; } break; case 6: if (str6cmp(m, 'S', 'T', 'O', 'R', 'E', 'D')) { r->type = MSG_RSP_MC_STORED; break; } if (str6cmp(m, 'E', 'X', 'I', 'S', 'T', 'S')) { r->type = MSG_RSP_MC_EXISTS; break; } break; case 7: if (str7cmp(m, 'D', 'E', 'L', 'E', 'T', 'E', 'D')) { r->type = MSG_RSP_MC_DELETED; break; } break; case 9: if (str9cmp(m, 'N', 'O', 'T', '_', 'F', 'O', 'U', 'N', 'D')) { r->type = MSG_RSP_MC_NOT_FOUND; break; } break; case 10: if (str10cmp(m, 'N', 'O', 'T', '_', 'S', 'T', 'O', 'R', 'E', 'D')) { r->type = MSG_RSP_MC_NOT_STORED; break; } break; case 12: if (str12cmp(m, 'C', 'L', 'I', 'E', 'N', 'T', '_', 'E', 'R', 'R', 'O', 'R')) { r->type = MSG_RSP_MC_CLIENT_ERROR; break; } if (str12cmp(m, 'S', 'E', 'R', 'V', 'E', 'R', '_', 'E', 'R', 'R', 'O', 'R')) { r->type = MSG_RSP_MC_SERVER_ERROR; break; } break; } switch (r->type) { case MSG_UNKNOWN: goto error; case MSG_RSP_MC_STORED: case MSG_RSP_MC_NOT_STORED: case MSG_RSP_MC_EXISTS: case MSG_RSP_MC_NOT_FOUND: case MSG_RSP_MC_DELETED: state = SW_CRLF; break; case MSG_RSP_MC_END: state = SW_CRLF; break; case MSG_RSP_MC_VALUE: state = SW_SPACES_BEFORE_KEY; break; case MSG_RSP_MC_ERROR: state = SW_CRLF; break; case MSG_RSP_MC_CLIENT_ERROR: case MSG_RSP_MC_SERVER_ERROR: state = SW_RUNTO_CRLF; break; default: NOT_REACHED(); } p = p - 1; /* go back by 1 byte */ } break; case SW_SPACES_BEFORE_KEY: if (ch != ' ') { state = SW_KEY; p = p - 1; /* go back by 1 byte */ } break; case SW_KEY: if (ch == ' ') { /* r->token = NULL; */ state = SW_SPACES_BEFORE_FLAGS; } break; case SW_SPACES_BEFORE_FLAGS: if (ch != ' ') { if (!isdigit(ch)) { goto error; } state = SW_FLAGS; p = p - 1; /* go back by 1 byte */ } break; case SW_FLAGS: if (r->token == NULL) { /* flags_start <- p */ /* r->token = p; */ } if (isdigit(ch)) { /* flags <- flags * 10 + (ch - '0') */ ; } else if (ch == ' ') { /* flags_end <- p - 1 */ /* r->token = NULL; */ state = SW_SPACES_BEFORE_VLEN; } else { goto error; } break; case SW_SPACES_BEFORE_VLEN: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_VLEN; r->vlen = 0; } break; case SW_VLEN: if (isdigit(ch)) { r->vlen = r->vlen * 10 + (uint32_t)(ch - '0'); } else if (ch == ' ' || ch == CR) { /* vlen_end <- p - 1 */ p = p - 1; /* go back by 1 byte */ /* r->token = NULL; */ state = SW_RUNTO_CRLF; } else { goto error; } break; case SW_RUNTO_VAL: switch (ch) { case LF: /* val_start <- p + 1 */ state = SW_VAL; r->token = NULL; break; default: goto error; } break; case SW_VAL: m = p + r->vlen; if (m >= b->last) { ASSERT(r->vlen >= (uint32_t)(b->last - p)); r->vlen -= (uint32_t)(b->last - p); m = b->last - 1; p = m; /* move forward by vlen bytes */ break; } switch (*m) { case CR: /* val_end <- p - 1 */ p = m; /* move forward by vlen bytes */ state = SW_VAL_LF; break; default: goto error; } break; case SW_VAL_LF: switch (ch) { case LF: /* state = SW_END; */ state = SW_RSP_STR; break; default: goto error; } break; case SW_END: if (r->token == NULL) { if (ch != 'E') { goto error; } /* end_start <- p */ r->token = p; } else if (ch == CR) { /* end_end <- p */ m = r->token; r->token = NULL; switch (p - m) { case 3: if (str4cmp(m, 'E', 'N', 'D', '\r')) { r->end = m; state = SW_ALMOST_DONE; } break; default: goto error; } } break; case SW_RUNTO_CRLF: switch (ch) { case CR: if (r->type == MSG_RSP_MC_VALUE) { state = SW_RUNTO_VAL; } else { state = SW_ALMOST_DONE; } break; default: break; } break; case SW_CRLF: switch (ch) { case ' ': break; case CR: state = SW_ALMOST_DONE; break; default: goto error; } break; case SW_ALMOST_DONE: switch (ch) { case LF: /* rsp_end <- p */ goto done; default: goto error; } break; case SW_SENTINEL: default: NOT_REACHED(); break; } } ASSERT(p == b->last); r->pos = p; r->state = state; if (b->last == b->end && r->token != NULL) { if (state <= SW_RUNTO_VAL || state == SW_CRLF || state == SW_ALMOST_DONE) { r->state = SW_START; } r->pos = r->token; r->token = NULL; r->result = MSG_PARSE_REPAIR; } else { r->result = MSG_PARSE_AGAIN; } log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "parsed rsp %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->state, r->pos - b->pos, b->last - b->pos); return; done: ASSERT(r->type > MSG_UNKNOWN && r->type < MSG_SENTINEL); r->pos = p + 1; ASSERT(r->pos <= b->last); r->state = SW_START; r->token = NULL; r->result = MSG_PARSE_OK; log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "parsed rsp %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->state, r->pos - b->pos, b->last - b->pos); return; error: r->result = MSG_PARSE_ERROR; r->state = state; errno = EINVAL; log_hexdump(LOG_INFO, b->pos, mbuf_length(b), "parsed bad rsp %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->state); }
static bool dyn_parse_core(struct msg *r) { struct dmsg *dmsg; struct mbuf *b; uint8_t *p, *token; uint8_t ch = ' '; uint64_t num = 0; dyn_state = r->dyn_state; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dyn_state: %d", r->dyn_state); } if (r->dyn_state == DYN_DONE || r->dyn_state == DYN_POST_DONE) return true; b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg = r->dmsg; if (dmsg == NULL) { r->dmsg = dmsg_get(); dmsg = r->dmsg; dmsg->owner = r; if (dmsg == NULL) {//should track this as a dropped message loga("unable to create a new dmsg"); goto error; //should count as OOM error } } token = NULL; for (p = r->pos; p < b->last; p++) { ch = *p; switch (dyn_state) { case DYN_START: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_START"); } if (ch != ' ' && ch != '$') { break; } if (ch == ' ') { if (token == NULL) token = p; break; } if (ch == '$') { if (p + 5 < b->last) { if ((*(p+1) == '2') && (*(p+2) == '0') && (*(p+3) == '1') && (*(p+4) == '4') && (*(p+5) == '$')) { dyn_state = DYN_MAGIC_STRING; p += 5; } else { //goto skip; token = NULL; //reset } } else { goto split; } } else { loga("Facing a weird char %c", p); //goto skip; token = NULL; //reset } break; case DYN_MAGIC_STRING: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MAGIC_STRING"); } if (ch == ' ') { dyn_state = DYN_MSG_ID; num = 0; break; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; loga("Facing a weird char %c", p); //goto skip; dyn_state = DYN_START; } break; case DYN_MSG_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MSG_ID"); log_debug(LOG_DEBUG, "num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "MSG ID : %d", num); } dmsg->id = num; dyn_state = DYN_TYPE_ID; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); //goto skip; token = NULL; //reset dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_TYPE_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_TYPE_ID: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Type Id: %d", num); } dmsg->type = num; dyn_state = DYN_BIT_FIELD; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_BIT_FIELD: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD, num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD : %d", num); } dmsg->bit_field = num & 0xF; dyn_state = DYN_VERSION; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_VERSION: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_VERSION: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "VERSION : %d", num); } dmsg->version = num; dyn_state = DYN_SAME_DC; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_SAME_DC: if (isdigit(ch)) { dmsg->same_dc = ch - '0'; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SAME_DC %d", dmsg->same_dc); } } else if (ch == ' ' && isdigit(*(p-1))) { dyn_state = DYN_DATA_LEN; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA_LEN: num = %d", num); } if (ch == '*') { break; } else if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Data len: %d", num); } dmsg->mlen = num; dyn_state = DYN_DATA; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA"); } if (p + dmsg->mlen < b->last) { dmsg->data = p; p += dmsg->mlen - 1; dyn_state = DYN_SPACES_BEFORE_PAYLOAD_LEN; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); goto split; } break; case DYN_SPACES_BEFORE_PAYLOAD_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_PAYLOAD_LEN"); } if (ch == ' ') { break; } else if (ch == '*') { dyn_state = DYN_PAYLOAD_LEN; num = 0; } break; case DYN_PAYLOAD_LEN: if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == CR) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Payload len: %d", num); } dmsg->plen = num; num = 0; dyn_state = DYN_CRLF_BEFORE_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_CRLF_BEFORE_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_DONE"); } if (*p == LF) { dyn_state = DYN_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DONE"); } r->pos = p; dmsg->payload = p; r->dyn_state = DYN_DONE; b->pos = p; goto done; break; default: NOT_REACHED(); break; } } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Not fully parsed yet!!!!!!"); } split: //this is an attempt recovery when we got a bad message //we try to look for the start the next good one and throw away the bad part if (r->dyn_state == DYN_START) { r->result = MSG_PARSE_AGAIN; if (b->last == b->end) { struct mbuf *nbuf = mbuf_get(); if (nbuf == NULL) { loga("Unable to obtain a new mbuf for replacement!"); mbuf_put(b); nbuf = mbuf_get(); mbuf_insert_head(&r->mhdr, nbuf); r->pos = nbuf->pos; return false; } //replacing the bad mbuf with a new and empty mbuf mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->pos = nbuf->pos; return false; } else { //split it and throw away the bad portion struct mbuf *nbuf; nbuf = mbuf_split(&r->mhdr, r->pos, NULL, NULL); if (nbuf == NULL) { return DN_ENOMEM; } mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); r->pos = nbuf->pos; return false; } } if (mbuf_length(b) == 0 || b->last == b->end) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Would this case ever happen?"); } r->result = MSG_PARSE_AGAIN; return false; } if (r->pos == b->last) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Forward to reading the new block of data"); } r->dyn_state = DYN_START; r->result = MSG_PARSE_AGAIN; token = NULL; return false; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "in split"); } r->dyn_state = DYN_START; r->pos = token; r->result = MSG_PARSE_REPAIR; if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, b->pos, mbuf_length(b), "split and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "split and inspecting full req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return false; done: r->pos = p; dmsg->source_address = r->owner->addr; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "at done with p at %d", p); log_hexdump(LOG_VVERB, r->pos, b->last - r->pos, "done and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return true; error: log_debug(LOG_ERR, "at error for state %d and c %c", dyn_state, *p); r->result = MSG_PARSE_ERROR; r->pos = p; errno = EINVAL; if (log_loggable(LOG_ERR)) { log_hexdump(LOG_ERR, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); log_hexdump(LOG_ERR, p, b->last - p, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); } r->dyn_state = dyn_state; return false; }
uint8_t dns_get_seeds(struct context * ctx, struct mbuf *seeds_buf) { static int _env_checked = 0; if (!_env_checked) { _env_checked = 1; txtName = getenv("DYNOMITE_DNS_TXT_NAME"); if (txtName == NULL) txtName = DNS_TXT_NAME; } log_debug(LOG_VVERB, "checking for %s", txtName); if (!seeds_check()) { return DN_NOOPS; } unsigned char buf[BUFSIZ]; int r = res_query(txtName, C_IN, T_TXT, buf, sizeof(buf)); if (r == -1) { log_debug(LOG_DEBUG, "DNS response for %s: %s", txtName, hstrerror(h_errno)); return DN_NOOPS; } if (r >= sizeof(buf)) { log_debug(LOG_DEBUG, "DNS reply is too large for %s: %d, bufsize: %d", txtName, r, sizeof(buf)); return DN_NOOPS; } HEADER *hdr = (HEADER*)buf; if (hdr->rcode != NOERROR) { log_debug(LOG_DEBUG, "DNS reply code for %s: %d", txtName, hdr->rcode); return DN_NOOPS; } int na = ntohs(hdr->ancount); ns_msg m; int k = ns_initparse(buf, r, &m); if (k == -1) { log_debug(LOG_DEBUG, "ns_initparse error for %s: %s", txtName, strerror(errno)); return DN_NOOPS; } int i; ns_rr rr; for (i = 0; i < na; ++i) { int k = ns_parserr(&m, ns_s_an, i, &rr); if (k == -1) { log_debug(LOG_DEBUG, "ns_parserr for %s: %s", txtName, strerror (errno)); return DN_NOOPS; } mbuf_rewind(seeds_buf); unsigned char *r = ns_rr_rdata(rr); if (r[0] >= ns_rr_rdlen(rr)) { log_debug(LOG_DEBUG, "invalid TXT length for %s: %d < %d", txtName, r[0], ns_rr_rdlen(rr)); return DN_NOOPS; } log_debug(LOG_VERB, "seeds for %s: %.*s", txtName, r[0], r +1); mbuf_copy(seeds_buf, r + 1, r[0]); } uint32_t seeds_hash = hash_seeds(seeds_buf->pos, mbuf_length(seeds_buf)); if (last_seeds_hash != seeds_hash) { last_seeds_hash = seeds_hash; } else { return DN_NOOPS; } return DN_OK; }
static bool dyn_parse_core(struct msg *r) { struct dmsg *dmsg; struct mbuf *b; uint8_t *p; uint8_t ch; uint64_t num = 0; state = r->dyn_state; b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg = r->dmsg; if (dmsg == NULL) { r->dmsg = dmsg_get(); dmsg = r->dmsg; if (dmsg == NULL) {//should track this as a dropped message goto error; //should count as OOM error } } //log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "dyn parser: parsed req %"PRIu64" res %d type %d", r->id, r->result, r->type, r->dyn_state); for (p = r->pos; p < b->last; p++) { ch = *p; switch (state) { case DYN_START: //log_debug(LOG_DEBUG, "DYN_START"); if (ch == ' ') { break; } else if (isdigit(ch)) { num = ch - '0'; state = DYN_MAGIC_NUMBER; } else { goto skip; } break; case DYN_MAGIC_NUMBER: //log_debug(LOG_DEBUG, "DYN_MAGIC_NUMBER"); //log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (num == MAGIC_NUMBER) { state = DYN_SPACES_BEFORE_MSG_ID; } else { goto error; } } break; case DYN_SPACES_BEFORE_MSG_ID: //log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_MSG_ID"); if (ch == ' ') { break; } else if (isdigit(ch)) { num = ch - '0'; state = DYN_MSG_ID; } else { goto error; } break; case DYN_MSG_ID: log_debug(LOG_DEBUG, "DYN_MSG_ID"); log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch != ' ') { goto error; } else { //if (num >= 0) { //log_debug(LOG_DEBUG, "MSG ID : %d", num); dmsg->id = num; state = DYN_SPACES_BEFORE_TYPE_ID; //} else { // goto error; //} } break; case DYN_SPACES_BEFORE_TYPE_ID: log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_TYPE_ID"); if (ch == ' ') { break; } else if (isdigit(ch)) { num = ch - '0'; state = DYN_TYPE_ID; } else { goto error; } break; case DYN_TYPE_ID: log_debug(LOG_DEBUG, "DYN_TYPE_ID"); log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (num > 0) { log_debug(LOG_DEBUG, "Type Id: %d", num); dmsg->type = num; //state = DYN_SPACES_BEFORE_VERSION; state = DYN_SPACES_BEFORE_BIT_FIELD; } else { goto error; } } break; case DYN_SPACES_BEFORE_BIT_FIELD: if (ch == ' ') { break; } else if (isdigit(ch)) { num = ch - '0'; state = DYN_BIT_FIELD; } else { goto error; } break; case DYN_BIT_FIELD: log_debug(LOG_DEBUG, "DYN_BIT_FIELD"); log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (ch == ' ') { log_debug(LOG_DEBUG, "DYN_BIT_FIELD : %d", num); dmsg->bit_field = num & 0xF; state = DYN_SPACES_BEFORE_VERSION; } else { goto error; } } log_debug(LOG_DEBUG, "Post DYN_BIT_FIELD"); log_debug(LOG_DEBUG, "num = %d", num); break; case DYN_SPACES_BEFORE_VERSION: log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_VERSION"); if (ch == ' ') { break; } else if (isdigit(ch)) { num = ch - '0'; state = DYN_VERSION; } else { goto error; } break; case DYN_VERSION: log_debug(LOG_DEBUG, "DYN_VERSION"); log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (ch == ' ') { //log_debug(LOG_DEBUG, "VERSION : %d", num); dmsg->version = num; state = DYN_SPACES_BEFORE_STAR; } else { goto error; } } break; case DYN_SPACES_BEFORE_STAR: //log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_STAR"); if (ch == ' ') { break; } else if (ch == '*') { state = DYN_DATA_LEN; num = 0; } else { goto error; } //else { // state = DYN_STAR; //} break; //case DYN_STAR: //log_debug(LOG_DEBUG, "DYN_STAR"); // if (ch == '*') { // state = DYN_DATA_LEN; // num = 0; // } else { // goto error; // } // break; case DYN_DATA_LEN: log_debug(LOG_DEBUG, "DYN_DATA_LEN"); log_debug(LOG_DEBUG, "num = %d", num); if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (ch == ' ') { log_debug(LOG_DEBUG, "Data len: %d", num); dmsg->mlen = num; state = DYN_SPACE_BEFORE_DATA; num = 0; } else { goto error; } } break; case DYN_SPACE_BEFORE_DATA: log_debug(LOG_DEBUG, "DYN_SPACE_BEFORE_DATA"); state = DYN_DATA; break; case DYN_DATA: log_debug(LOG_DEBUG, "DYN_DATA"); p -= 1; if (dmsg->mlen > 0) { dmsg->data = p; p += dmsg->mlen - 1; state = DYN_SPACES_BEFORE_PAYLOAD_LEN; } else { goto error; } break; case DYN_SPACES_BEFORE_PAYLOAD_LEN: //this only need in dynomite's custome msg log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_PAYLOAD_LEN"); if (ch == ' ') { break; } else if (ch == '*') { state = DYN_PAYLOAD_LEN; num = 0; } else { goto error; } break; case DYN_PAYLOAD_LEN: if (isdigit(ch)) { num = num*10 + (ch - '0'); } else { if (ch == CR) { log_debug(LOG_DEBUG, "Payload len: %d", num); dmsg->plen = num; state = DYN_CRLF_BEFORE_DONE; num = 0; } else { goto error; } } break; case DYN_CRLF_BEFORE_DONE: //log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_DONE"); if (*p == LF) { state = DYN_DONE; } else { goto error; } break; case DYN_DONE: //log_debug(LOG_DEBUG, "DYN_DONE"); r->pos = p; dmsg->payload = p; r->dyn_state = DYN_DONE; b->pos = p; goto done; break; default: NOT_REACHED(); break; } } done: dmsg->owner = r; dmsg->source_address = r->owner->addr; //r->mlen = mbuf_length(b); //log_debug(LOG_DEBUG, "at done with p at %d", p); dmsg_dump(r->dmsg); log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "dyn: parsed req %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->dyn_state, r->pos - b->pos, b->last - b->pos); return true; skip: //log_debug(LOG_DEBUG, "This is not a dyn message"); dmsg->type = DMSG_UNKNOWN; dmsg->owner = r; dmsg->source_address = r->owner->addr; return true; error: log_debug(LOG_ERR, "at error"); r->result = MSG_PARSE_ERROR; r->state = state; errno = EINVAL; log_hexdump(LOG_INFO, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->state); return false; return true; //fix me }
void dyn_parse_rsp(struct msg *r) { #ifdef DN_DEBUG_LOG log_debug(LOG_VERB, "In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); #endif if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); return; } #ifdef DN_DEBUG_LOG log_debug(LOG_VERB, "encrypted aes key length : %d", dmsg->mlen); loga("AES encryption key from conn: %s\n", base64_encode(r->owner->aes_key, AES_KEYLEN)); #endif //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); #ifdef DN_DEBUG_LOG log_hexdump(LOG_VERB, decrypted_buf->pos, mbuf_length(decrypted_buf), "dyn message decrypted payload: "); #endif struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); b->last = b->pos; r->pos = decrypted_buf->start; mbuf_insert(&r->mhdr, decrypted_buf); } if (r->redis) return redis_parse_rsp(r); return memcache_parse_rsp(r); } #ifdef DN_DEBUG_LOG //bad case log_debug(LOG_DEBUG, "Bad message - cannot parse"); //fix me to do something msg_dump(r); #endif //r->state = 0; //r->result = MSG_PARSE_OK; }
void dyn_parse_req(struct msg *r) { #ifdef DN_DEBUG_LOG log_debug(LOG_VVERB, "In dyn_parse_req, start to process request :::::::::::::::::::::: "); msg_dump(r); #endif bool done_parsing = false; if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_REQ && dmsg->type != GOSSIP_SYN) { log_debug(LOG_DEBUG, "Req parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (dmsg->type == GOSSIP_SYN) { #ifdef DN_DEBUG_LOG log_debug(LOG_DEBUG, "Req parser: I got a GOSSIP_SYN msg"); #endif //TODOs: need to address multi-buffer msg later struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg->payload = b->pos; b->pos = b->pos + dmsg->plen; r->pos = b->pos; done_parsing = true; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); return; } #ifdef DN_DEBUG_LOG log_debug(LOG_DEBUG, "data or encrypted aes key length : %d", dmsg->plen); #endif if (dmsg->mlen > 1) { #ifdef DN_DEBUG_LOG log_debug(LOG_DEBUG, "dmsg->mlen is something: %d, need to process it", dmsg->plen); #endif //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); //Decrypt payload dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, aes_decrypted_buf); } else { #ifdef DN_DEBUG_LOG log_debug(LOG_DEBUG, "dmsg->mlen is a dummy: %d, NO need to process it", dmsg->plen); #endif dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); } #ifdef DN_DEBUG_LOG loga("AES encryption key: %s\n", base64_encode(aes_decrypted_buf, AES_KEYLEN)); log_hexdump(LOG_VERB, decrypted_buf->pos, mbuf_length(decrypted_buf), "dyn message decrypted payload: "); #endif struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); b->last = b->pos; r->pos = decrypted_buf->start; mbuf_insert(&r->mhdr, decrypted_buf); //reset these variables dmsg->payload = decrypted_buf->start; dmsg->plen = mbuf_length(decrypted_buf); } if (done_parsing) return; if (r->redis) return redis_parse_req(r); return memcache_parse_req(r); } //bad case log_debug(LOG_DEBUG, "Bad message - cannot parse"); //fix me to do something msg_dump(r); }
/* * Pre-coalesce handler is invoked when the message is a response to * the fragmented multi vector request - 'get' or 'gets' and all the * responses to the fragmented request vector hasn't been received */ void memcache_pre_coalesce(struct msg *r) { struct msg *pr = r->peer; /* peer request */ struct mbuf *mbuf; ASSERT(!r->request); ASSERT(pr->request); if (pr->frag_id == 0) { /* do nothing, if not a response to a fragmented request */ return; } pr->frag_owner->nfrag_done++; switch (r->type) { case MSG_RSP_MC_VALUE: case MSG_RSP_MC_END: /* * Readjust responses of the fragmented message vector by not * including the end marker for all */ ASSERT(r->end != NULL); for (;;) { mbuf = STAILQ_LAST(&r->mhdr, mbuf, next); ASSERT(mbuf != NULL); /* * We cannot assert that end marker points to the last mbuf * Consider a scenario where end marker points to the * penultimate mbuf and the last mbuf only contains spaces * and CRLF: mhdr -> [...END] -> [\r\n] */ if (r->end >= mbuf->pos && r->end < mbuf->last) { /* end marker is within this mbuf */ r->mlen -= (uint32_t)(mbuf->last - r->end); mbuf->last = r->end; break; } /* end marker is not in this mbuf */ r->mlen -= mbuf_length(mbuf); mbuf_remove(&r->mhdr, mbuf); mbuf_put(mbuf); } break; default: /* * Valid responses for a fragmented requests are MSG_RSP_MC_VALUE or, * MSG_RSP_MC_END. For an invalid response, we send out SERVER_ERRROR * with EINVAL errno */ mbuf = STAILQ_FIRST(&r->mhdr); log_hexdump(LOG_ERR, mbuf->pos, mbuf_length(mbuf), "rsp fragment " "with unknown type %d", r->type); pr->error = 1; pr->err = EINVAL; break; } }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); return data_store_parse_rsp(r); } //Subtract already received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_rsp(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (done_parsing) return; return data_store_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
/* * copy one response from src to dst * return bytes copied * */ static rstatus_t memcache_copy_bulk(struct msg *dst, struct msg *src) { struct mbuf *mbuf, *nbuf; uint8_t *p; uint32_t len = 0; uint32_t bytes = 0; uint32_t i = 0; for (mbuf = STAILQ_FIRST(&src->mhdr); mbuf && mbuf_empty(mbuf); mbuf = STAILQ_FIRST(&src->mhdr)) { mbuf_remove(&src->mhdr, mbuf); mbuf_put(mbuf); } mbuf = STAILQ_FIRST(&src->mhdr); if (mbuf == NULL) { return NC_OK; /* key not exists */ } p = mbuf->pos; /* get : VALUE key 0 len\r\nval\r\n */ /* gets: VALUE key 0 len cas\r\nval\r\n */ ASSERT(*p == 'V'); for (i = 0; i < 3; i++) { /* eat 'VALUE key 0 ' */ for (; *p != ' ';) { p++; } p++; } len = 0; for (; p < mbuf->last && isdigit(*p); p++) { len = len * 10 + (uint32_t)(*p - '0'); } for (; p < mbuf->last && ('\r' != *p); p++) { /* eat cas for gets */ ; } len += CRLF_LEN * 2; len += (p - mbuf->pos); bytes = len; /* copy len bytes to dst */ for (; mbuf;) { if (mbuf_length(mbuf) <= len) { /* steal this mbuf from src to dst */ nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&src->mhdr, mbuf); mbuf_insert(&dst->mhdr, mbuf); len -= mbuf_length(mbuf); mbuf = nbuf; } else { /* split it */ nbuf = mbuf_get(); if (nbuf == NULL) { return NC_ENOMEM; } mbuf_copy(nbuf, mbuf->pos, len); mbuf_insert(&dst->mhdr, nbuf); mbuf->pos += len; break; } } dst->mlen += bytes; src->mlen -= bytes; log_debug(LOG_VVERB, "memcache_copy_bulk copy bytes: %d", bytes); return NC_OK; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { struct server *server = p_conn->owner; log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to peer conn '%s' on rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(p_conn->sd), rack->name->len, rack->name->data, server->dc.len, server->dc.data); struct string *dc = rack->dc; rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply && !msg->swallow) { conn_enqueue_outq(ctx, c_conn, msg); } ASSERT(p_conn->type == CONN_DNODE_PEER_SERVER); ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue the message (request) into peer inq */ status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); req_put(msg); return; } struct server_pool *pool = c_conn->owner; dmsg_type_t msg_type = (string_compare(&pool->dc, dc) != 0)? DMSG_REQ_FORWARD : DMSG_REQ; if (p_conn->dnode_secured) { //Encrypting and adding header for a request if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); } //write dnode header if (ENCRYPTION) { status = dyn_aes_encrypt_msg(msg, p_conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(msg); return; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the msg payload"); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } } else { //write dnode header dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } mbuf_insert_head(&msg->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } conn_enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
void memcache_parse_req(struct msg *r) { struct mbuf *b; uint8_t *p, *m; uint8_t ch; enum { SW_START, SW_REQ_TYPE, SW_SPACES_BEFORE_KEY, SW_KEY, SW_SPACES_BEFORE_KEYS, SW_SPACES_BEFORE_FLAGS, SW_FLAGS, SW_SPACES_BEFORE_EXPIRY, SW_EXPIRY, SW_SPACES_BEFORE_VLEN, SW_VLEN, SW_SPACES_BEFORE_CAS, SW_CAS, SW_RUNTO_VAL, SW_VAL, SW_SPACES_BEFORE_NUM, SW_NUM, SW_RUNTO_CRLF, SW_CRLF, SW_NOREPLY, SW_AFTER_NOREPLY, SW_ALMOST_DONE, SW_SENTINEL } state; state = r->state; b = STAILQ_LAST(&r->mhdr, mbuf, next); ASSERT(r->request); ASSERT(state >= SW_START && state < SW_SENTINEL); ASSERT(b != NULL); ASSERT(b->pos <= b->last); /* validate the parsing maker */ ASSERT(r->pos != NULL); ASSERT(r->pos >= b->pos && r->pos <= b->last); for (p = r->pos; p < b->last; p++) { ch = *p; switch (state) { case SW_START: if (ch == ' ') { break; } if (!islower(ch)) { goto error; } /* req_start <- p; type_start <- p */ r->token = p; state = SW_REQ_TYPE; break; case SW_REQ_TYPE: if (ch == ' ' || ch == CR) { /* type_end = p - 1 */ m = r->token; r->token = NULL; r->type = MSG_UNKNOWN; switch (p - m) { case 3: if (str4cmp(m, 'g', 'e', 't', ' ')) { r->type = MSG_REQ_GET; break; } if (str4cmp(m, 's', 'e', 't', ' ')) { r->type = MSG_REQ_SET; break; } if (str4cmp(m, 'a', 'd', 'd', ' ')) { r->type = MSG_REQ_ADD; break; } if (str4cmp(m, 'c', 'a', 's', ' ')) { r->type = MSG_REQ_CAS; break; } break; case 4: if (str4cmp(m, 'g', 'e', 't', 's')) { r->type = MSG_REQ_GETS; break; } if (str4cmp(m, 'i', 'n', 'c', 'r')) { r->type = MSG_REQ_INCR; break; } if (str4cmp(m, 'd', 'e', 'c', 'r')) { r->type = MSG_REQ_DECR; break; } if (str4cmp(m, 'q', 'u', 'i', 't')) { r->type = MSG_REQ_QUIT; r->quit = 1; break; } break; case 6: if (str6cmp(m, 'a', 'p', 'p', 'e', 'n', 'd')) { r->type = MSG_REQ_APPEND; break; } if (str6cmp(m, 'd', 'e', 'l', 'e', 't', 'e')) { r->type = MSG_REQ_DELETE; break; } break; case 7: if (str7cmp(m, 'p', 'r', 'e', 'p', 'e', 'n', 'd')) { r->type = MSG_REQ_PREPEND; break; } if (str7cmp(m, 'r', 'e', 'p', 'l', 'a', 'c', 'e')) { r->type = MSG_REQ_REPLACE; break; } if (str7cmp(m, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { r->type = MSG_REQ_VERSION; break; } break; } if (memcache_key(r)) { if (ch == CR) { goto error; } state = SW_SPACES_BEFORE_KEY; } else if (memcache_quit(r) || memcache_version(r)) { p = p - 1; /* go back by 1 byte */ state = SW_CRLF; } else { goto error; } } else if (!islower(ch)) { goto error; } break; case SW_SPACES_BEFORE_KEY: if (ch != ' ') { p = p - 1; /* go back by 1 byte */ state = SW_KEY; } break; case SW_KEY: if (r->token == NULL) { r->token = p; r->key_start = p; } if (ch == ' ' || ch == CR) { if ((p - r->key_start) > MEMCACHE_MAX_KEY_LENGTH) { log_error("parsed bad req %"PRIu64" of type %d with key " "prefix '%.*s...' and length %d that exceeds " "maximum key length", r->id, r->type, 16, r->key_start, p - r->key_start); goto error; } r->key_end = p; r->token = NULL; /* get next state */ if (memcache_storage(r)) { state = SW_SPACES_BEFORE_FLAGS; } else if (memcache_arithmetic(r)) { state = SW_SPACES_BEFORE_NUM; } else if (memcache_delete(r)) { state = SW_RUNTO_CRLF; } else if (memcache_retrieval(r)) { state = SW_SPACES_BEFORE_KEYS; } else { state = SW_RUNTO_CRLF; } if (ch == CR) { if (memcache_storage(r) || memcache_arithmetic(r)) { goto error; } p = p - 1; /* go back by 1 byte */ } } break; case SW_SPACES_BEFORE_KEYS: ASSERT(memcache_retrieval(r)); switch (ch) { case ' ': break; case CR: state = SW_ALMOST_DONE; break; default: r->token = p; goto fragment; } break; case SW_SPACES_BEFORE_FLAGS: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_FLAGS; } break; case SW_FLAGS: if (r->token == NULL) { /* flags_start <- p */ r->token = p; r->flags = 0; } if (isdigit(ch)) { r->flags = r->flags * 10 + (uint32_t)(ch - '0'); } else if (ch == ' ') { /* flags_end <- p - 1 */ r->token = NULL; state = SW_SPACES_BEFORE_EXPIRY; } else { goto error; } break; case SW_SPACES_BEFORE_EXPIRY: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_EXPIRY; } break; case SW_EXPIRY: if (r->token == NULL) { /* expiry_start <- p */ r->token = p; r->expiry = 0; } if (isdigit(ch)) { r->expiry = r->expiry * 10 + (uint32_t)(ch - '0'); } else if (ch == ' ') { /* expiry_end <- p - 1 */ r->token = NULL; state = SW_SPACES_BEFORE_VLEN; } else { goto error; } break; case SW_SPACES_BEFORE_VLEN: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_VLEN; } break; case SW_VLEN: if (r->token == NULL) { /* vlen_start <- p */ r->token = p; r->vlen = 0; } if (isdigit(ch)) { r->vlen = r->vlen * 10 + (uint32_t)(ch - '0'); } else if (memcache_cas(r)) { if (ch != ' ') { goto error; } /* vlen_end <- p - 1 */ r->rvlen = r->vlen; p = p - 1; /* go back by 1 byte */ r->token = NULL; state = SW_SPACES_BEFORE_CAS; } else if (ch == ' ' || ch == CR) { /* vlen_end <- p - 1 */ r->rvlen = r->vlen; p = p - 1; /* go back by 1 byte */ r->token = NULL; state = SW_RUNTO_CRLF; } else { goto error; } break; case SW_SPACES_BEFORE_CAS: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_CAS; } break; case SW_CAS: if (r->token == NULL) { /* cas_start <- p */ r->token = p; r->cas = 0; } if (isdigit(ch)) { r->cas = r->cas * 10ULL + (uint64_t)(ch - '0'); } else if (ch == ' ' || ch == CR) { /* cas_end <- p - 1 */ p = p - 1; /* go back by 1 byte */ r->token = NULL; state = SW_RUNTO_CRLF; } else { goto error; } break; case SW_RUNTO_VAL: switch (ch) { case LF: /* val_start <- p + 1 */ state = SW_VAL; break; default: goto error; } break; case SW_VAL: if (r->value == NULL) { r->value = p; } m = p + r->rvlen; if (m >= b->last) { ASSERT(r->rvlen >= (uint32_t)(b->last - p)); r->rvlen -= (uint32_t)(b->last - p); m = b->last - 1; p = m; /* move forward by vlen bytes */ break; } switch (*m) { case CR: /* val_end <- p - 1 */ p = m; /* move forward by vlen bytes */ state = SW_ALMOST_DONE; break; default: goto error; } break; case SW_SPACES_BEFORE_NUM: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_NUM; } break; case SW_NUM: if (r->token == NULL) { /* num_start <- p */ r->token = p; r->num = 0; } if (isdigit(ch)) { r->num = r->num * 10ULL + (uint64_t)(ch - '0'); } else if (ch == ' ' || ch == CR) { r->token = NULL; /* num_end <- p - 1 */ p = p - 1; /* go back by 1 byte */ state = SW_RUNTO_CRLF; } else { goto error; } break; case SW_RUNTO_CRLF: switch (ch) { case ' ': break; case 'n': if (memcache_storage(r) || memcache_arithmetic(r) || memcache_delete(r)) { p = p - 1; /* go back by 1 byte */ state = SW_NOREPLY; } else { goto error; } break; case CR: if (memcache_storage(r)) { state = SW_RUNTO_VAL; } else { state = SW_ALMOST_DONE; } break; default: goto error; } break; case SW_NOREPLY: if (r->token == NULL) { /* noreply_start <- p */ r->token = p; } switch (ch) { case ' ': case CR: m = r->token; if (((p - m) == 7) && str7cmp(m, 'n', 'o', 'r', 'e', 'p', 'l', 'y')) { ASSERT(memcache_storage(r) || memcache_arithmetic(r) || memcache_delete(r)); r->token = NULL; /* noreply_end <- p - 1 */ r->noreply = 1; state = SW_AFTER_NOREPLY; p = p - 1; /* go back by 1 byte */ } else { goto error; } } break; case SW_AFTER_NOREPLY: switch (ch) { case ' ': break; case CR: if (memcache_storage(r)) { state = SW_RUNTO_VAL; } else { state = SW_ALMOST_DONE; } break; default: goto error; } break; case SW_CRLF: switch (ch) { case ' ': break; case CR: state = SW_ALMOST_DONE; break; default: goto error; } break; case SW_ALMOST_DONE: switch (ch) { case LF: /* req_end <- p */ goto done; default: goto error; } break; case SW_SENTINEL: default: NOT_REACHED(); break; } } /* * At this point, buffer from b->pos to b->last has been parsed completely * but we haven't been able to reach to any conclusion. Normally, this * means that we have to parse again starting from the state we are in * after more data has been read. The newly read data is either read into * a new mbuf, if existing mbuf is full (b->last == b->end) or into the * existing mbuf. * * The only exception to this is when the existing mbuf is full (b->last * is at b->end) and token marker is set, which means that we have to * copy the partial token into a new mbuf and parse again with more data * read into new mbuf. */ ASSERT(p == b->last); r->pos = p; r->state = state; if (b->last == b->end && r->token != NULL) { r->pos = r->token; r->token = NULL; r->result = MSG_PARSE_REPAIR; } else { r->result = MSG_PARSE_AGAIN; } log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "parsed req %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->state, r->pos - b->pos, b->last - b->pos); return; fragment: ASSERT(p != b->last); ASSERT(r->token != NULL); r->pos = r->token; r->token = NULL; r->state = state; r->result = MSG_PARSE_FRAGMENT; log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "parsed req %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->state, r->pos - b->pos, b->last - b->pos); return; done: ASSERT(r->type > MSG_UNKNOWN && r->type < MSG_SENTINEL); r->pos = p + 1; ASSERT(r->pos <= b->last); r->state = SW_START; r->result = MSG_PARSE_OK; log_hexdump(LOG_VERB, b->pos, mbuf_length(b), "parsed req %"PRIu64" res %d " "type %d state %d rpos %d of %d", r->id, r->result, r->type, r->state, r->pos - b->pos, b->last - b->pos); return; error: r->result = MSG_PARSE_ERROR; r->state = state; errno = EINVAL; log_hexdump(LOG_INFO, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->state); }
/* dnode sends a response back to a peer */ static struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; // SMB: There is some non trivial thing happening here. And I think it is very // important to read this before anything is changed in here. There is also a // bug that exists which I will mention briefly: // A message is a structure that has a list of mbufs which hold the actual data. // Each mbuf has start, pos, last as pointers (amongst others) which indicate start of the // buffer, current read position and end of the buffer respectively. // // Every time a message is sent to a peer within dynomite, a DNODE header is // prepended which is created using dmsg_write. A message remembers this case // in dnode_header_prepended, so that if the messsage is sent in parts, the // header is not prepended again for the subsequent parts. // // Like I said earlier there is a pos pointer in mbuf. If a message is sent // partially (or it is parsed partially too I think) the pos reflects that // case such that things can be resumed where it left off. // // dmsg_write has a parameter which reflects the payload length following the // dnode header calculated by msg_length. msg_length is a summation of all // mbuf sizes (last - start). Which I think is wrong. // // +------------+ +---------------+ // | DC1N1 +---------> | DC2N1 | // +------------+ +-------+-------+ // | // | // | // | // +-------v-------+ // | DC2N2 | // +---------------+ // // Consider the case where // a node DC1N1 in region DC1 sends a request to DC2N1 which forwards it to // to local token owner DC2N2. Now DC2N1 receives a response from DC2N2 which // has to be relayed back to DC1N1. This response from DC2N2 already has a // dnode header but for the link between DC2N1 and DC2N2. DC2N1 should strip // this header and prepend its own header for sending it back to DC1N1. This // gets handled in encryption case since we overwrite all mbufs in the response // However if the encryption is off, the message length sent to dmsg_write // consists of the header from DC2N2 also which is wrong. So this relaying // of responses will not work for the case where encryption is disabled. // // So msg_length should really be from mbuf->pos and not mbuf->start. This // is a problem only with remote region replication since that is the only // case where we CAN have 2 hops to send the request/response. This is also // not a problem if encryption is ON. ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; if (rsp->dnode_header_prepended) { return rsp; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_debug(LOG_VERB, "sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } rsp->dnode_header_prepended = 1; mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
void dyn_parse_req(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_req, start to process request :::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_REQ && dmsg->type != DMSG_REQ_FORWARD && dmsg->type != GOSSIP_SYN) { r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); data_store_parse_req(r); } //substract alraedy received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_req(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (dmsg->type == GOSSIP_SYN) { //TODOs: need to address multi-buffer msg later dmsg->payload = b->pos; b->pos = b->pos + dmsg->plen; r->pos = b->pos; done_parsing = true; } if (done_parsing) return; return data_store_parse_req(r); } //bad case if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Bad or splitted message"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
static int send_msg_to_all(check_unit *cunit, struct msg *msg) { int ret; thread_data *cdata = cunit->cdata; redis_group *trgroup = cdata->trgroup; redis_node *trnode; struct msg *msg_same; listNode *lnode; struct mbuf *mbuf; if (cunit == NULL || msg == NULL) { return RMT_ERROR; } msg_same = msg_get(msg->mb, msg->request, msg->kind); if (msg_same == NULL) { log_error("ERROR: msg clone failed."); msg_put(msg); msg_free(msg); msg = NULL; return RMT_ERROR; } lnode = listFirst(msg->data); while (lnode) { mbuf = listNodeValue(lnode); lnode = lnode->next; ret = msg_append_full(msg_same, mbuf->pos, mbuf_length(mbuf)); if (ret != RMT_OK) { log_error("ERROR: out of memory."); msg_put(msg_same); msg_free(msg_same); msg = NULL; return RMT_ERROR; } } msg_same->ptr = msg->ptr; msg_same->resp_check = msg->resp_check; ret = prepare_send_msg(cunit->srnode, msg, cunit->srnode); if (ret != RMT_OK) { msg_put(msg); msg_free(msg); msg = NULL; msg_put(msg_same); msg_free(msg_same); return RMT_ERROR; } msg = NULL; trnode = trgroup->get_backend_node(trgroup, (uint8_t *)cunit->key, (uint32_t)sdslen(cunit->key)); if(prepare_send_msg(trnode, msg_same, trnode) != RMT_OK){ msg_put(msg_same); msg_free(msg_same); return RMT_ERROR; } return RMT_OK; }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_rsp_send_next entering"); } ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *msg = rsp_send_next(ctx, conn); if (msg != NULL && conn->dyn_mode) { struct msg *pmsg = TAILQ_FIRST(&conn->omsg_q); //peer request's msg //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //if (ENCRYPTION) { struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return NULL; //TODOs: need to clean up } rstatus_t status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(encrypted_buf)); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "resp dyn message - original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_copy(header_buf, encrypted_buf->start, mbuf_length(encrypted_buf)); mbuf_insert(&msg->mhdr, header_buf); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); //mbuf_insert(&msg->mhdr, encrypted_buf); mbuf_put(data_buf); mbuf_put(encrypted_buf); //} else { // log_debug(LOG_VERB, "no encryption on the response's payload"); // dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(data_buf)); //} } else { dmsg_write(header_buf, msg_id, DMSG_RES, conn, 0);//Dont care about 0 or the real length as we don't use that value in unencryption mode mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(msg); } } return msg; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_peer_req_forward entering"); } rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } ASSERT(!p_conn->dnode_client && !p_conn->dnode_server); ASSERT(c_conn->client); /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&p_conn->imsg_q)) { status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } } uint64_t msg_id = peer_msg_id++; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); return; } if (p_conn->dnode_secured) { //Encrypting and adding header for a request struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //TODOs: need to deal with multi-block later log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, p_conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); log_hexdump(LOG_VERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); mbuf_insert(&msg->mhdr, encrypted_buf); //free it as no one will need it again mbuf_put(data_buf); } else { //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, 0); mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } p_conn->enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (TRACING_LEVEL == LOG_VERB) { log_debug(LOG_VERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }