void msg_put(struct msg *msg) { listNode *node; struct mbuf *mbuf; log_debug(LOG_VVERB, "put msg %p id %"PRIu64"", msg, msg->id); while (listLength(msg->data) > 0) { node = listFirst(msg->data); mbuf = listNodeValue(node); listDelNode(msg->data, node); mbuf_put(mbuf); } listRelease(msg->data); msg->data = NULL; if (msg->frag_seq) { rmt_free(msg->frag_seq); msg->frag_seq = NULL; } if (msg->keys) { msg->keys->nelem = 0; /* a hack here */ array_destroy(msg->keys); msg->keys = NULL; } msg->mb = NULL; }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); } if (r->redis) { return redis_parse_rsp(r); } return memcache_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
rstatus_t conn_close(struct conn *conn) { rstatus_t status; struct mbuf *mbuf, *nbuf; /* current and next mbuf */ if (conn->fd < 0) { conn_put(conn); return NC_OK; } if (!STAILQ_EMPTY(&conn->recv_queue)) { log_warn("close conn %d discard data in send_queue", conn->fd); for (mbuf = STAILQ_FIRST(&conn->recv_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&conn->recv_queue, mbuf); mbuf_put(mbuf); } } if (!STAILQ_EMPTY(&conn->send_queue)) { log_warn("close conn %d discard data in send_queue", conn->fd); for (mbuf = STAILQ_FIRST(&conn->send_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&conn->send_queue, mbuf); mbuf_put(mbuf); } } status = close(conn->fd); if (status < 0) { log_error("close c %d failed, ignored: %s", conn->fd, strerror(errno)); } conn->fd = -1; conn_put(conn); return NC_OK; }
static rstatus_t conn_send_queue(struct conn *conn) { struct mbuf *mbuf, *nbuf; /* current and next mbuf */ size_t mlen; /* current mbuf data length */ ssize_t n; for (mbuf = STAILQ_FIRST(&conn->send_queue); mbuf != NULL; mbuf = nbuf) { nbuf = STAILQ_NEXT(mbuf, next); if (mbuf_empty(mbuf)) { continue; } mlen = mbuf_length(mbuf); n = conn_send_buf(conn, mbuf->pos, mlen); if (n < 0) { if (n == NC_EAGAIN) { return NC_OK; } return NC_ERROR; } mbuf->pos += n; if (n < mlen) { ASSERT(mbuf->pos < mbuf->end); return NC_OK; } ASSERT(mbuf->pos == mbuf->last); mbuf_remove(&conn->send_queue, mbuf); mbuf_put(mbuf); } conn->send_ready = 0; return NC_OK; }
/* * Pre-coalesce handler is invoked when the message is a response to * the fragmented multi vector request - 'get' or 'gets' and all the * responses to the fragmented request vector hasn't been received */ void memcache_pre_coalesce(struct msg *r) { struct msg *pr = r->peer; /* peer request */ struct mbuf *mbuf; ASSERT(!r->request); ASSERT(pr->request); if (pr->frag_id == 0) { /* do nothing, if not a response to a fragmented request */ return; } pr->frag_owner->nfrag_done++; switch (r->type) { case MSG_RSP_MC_VALUE: case MSG_RSP_MC_END: /* * Readjust responses of the fragmented message vector by not * including the end marker for all */ ASSERT(r->end != NULL); for (;;) { mbuf = STAILQ_LAST(&r->mhdr, mbuf, next); ASSERT(mbuf != NULL); /* * We cannot assert that end marker points to the last mbuf * Consider a scenario where end marker points to the * penultimate mbuf and the last mbuf only contains spaces * and CRLF: mhdr -> [...END] -> [\r\n] */ if (r->end >= mbuf->pos && r->end < mbuf->last) { /* end marker is within this mbuf */ r->mlen -= (uint32_t)(mbuf->last - r->end); mbuf->last = r->end; break; } /* end marker is not in this mbuf */ r->mlen -= mbuf_length(mbuf); mbuf_remove(&r->mhdr, mbuf); mbuf_put(mbuf); } break; default: /* * Valid responses for a fragmented requests are MSG_RSP_MC_VALUE or, * MSG_RSP_MC_END. For an invalid response, we send out SERVER_ERRROR * with EINVAL errno */ mbuf = STAILQ_FIRST(&r->mhdr); log_hexdump(LOG_ERR, mbuf->pos, mbuf_length(mbuf), "rsp fragment " "with unknown type %d", r->type); pr->error = 1; pr->err = EINVAL; break; } }
static rstatus_t sentinel_proc_pub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string pool_name, server_name, server_ip, tmp_string, pub_titile, pub_event; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&pub_titile, "pmessage"); string_set_text(&pub_event, "+switch-master"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for pub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_titile, &tmp_string)) { log_error("pub title error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 7 for pub event */ msg_read_line(msg, line_buf, 4); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_event, &tmp_string)) { log_error("pub channel error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 9 for pub info */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } /* parse switch master info */ /* get pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get pool name string failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ' ', &server_name); if (status != NC_OK) { log_error("get server name string failed."); goto error; } /* skip old ip and port string */ status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old ip string failed."); goto error; } status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old port string failed."); goto error; } /* get new server ip string */ status = mbuf_read_string(line_buf, ' ', &server_ip); if (status != NC_OK) { log_error("get new server ip string failed."); goto error; } /* get new server port */ status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { log_error("get new server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); if (status == NC_OK) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&server_ip); string_deinit(&server_name); string_deinit(&pool_name); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_acksub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string sub_titile, sub_channel, sub_ok, tmp_string; struct mbuf *line_buf; string_init(&tmp_string); string_set_text(&sub_titile, "psubscribe"); string_set_text(&sub_channel, "+switch-master"); string_set_text(&sub_ok, ":1"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for sub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_titile, &tmp_string)) { goto error; } /* get line in line num 5 for sub channel */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_channel, &tmp_string)) { goto error; } /* get sub status */ msg_read_line(msg, line_buf, 1); if (line_buf == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_ok, &tmp_string)) { goto error; } log_debug(LOG_INFO, "success sub channel %.*s from sentinel", sub_channel.len, sub_channel.data); status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_sentinel_info(struct context *ctx, struct msg *msg) { rstatus_t status; int i, master_num, switch_num; struct string pool_name, server_name, server_ip, tmp_string, sentinel_masters_prefix, master_ok; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&sentinel_masters_prefix, "sentinel_masters"); string_set_text(&master_ok, "status=ok"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get sentinel master num at line 3 */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } status = mbuf_read_string(line_buf, ':', &tmp_string); if (status != NC_OK || string_compare(&sentinel_masters_prefix, &tmp_string)) { goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { goto error; } master_num = nc_atoi(tmp_string.data, tmp_string.len); if (master_num < 0) { log_error("parse master number from sentinel ack info failed."); goto error; } /* skip 3 line in ack info which is not used. */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } /* parse master info from sentinel ack info */ switch_num = 0; for (i = 0; i < master_num; i++) { msg_read_line(msg, line_buf, 1); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when parse master item."); goto error; } log_debug(LOG_INFO, "master item line : %.*s", mbuf_length(line_buf), line_buf->pos); /* skip master item prefix */ status = mbuf_read_string(line_buf, ':', NULL); if (status != NC_OK) { log_error("skip master item prefix failed"); goto error; } /* skip master item server name prefix */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item server name prefix failed."); goto error; } /* get server pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get server pool name failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ',', &server_name); if (status != NC_OK) { log_error("get server name failed."); goto error; } /* get master status */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get master status failed."); goto error; } if (string_compare(&master_ok, &tmp_string)) { log_error("master item status is not ok, use it anyway"); } /* skip ip string prefix name */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item address prefix failed."); goto error; } /* get server ip string */ status = mbuf_read_string(line_buf, ':', &server_ip); if (status != NC_OK) { log_error("get server ip string failed."); goto error; } /* get server port */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); /* if server is switched, add switch number */ if (status == NC_OK) { switch_num++; } } if (switch_num > 0) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&pool_name); string_deinit(&server_name); string_deinit(&server_ip); return status; error: status = NC_ERROR; goto done; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { struct server *server = p_conn->owner; log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to peer conn '%s' on rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(p_conn->sd), rack->name->len, rack->name->data, server->dc.len, server->dc.data); struct string *dc = rack->dc; rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply && !msg->swallow) { conn_enqueue_outq(ctx, c_conn, msg); } ASSERT(p_conn->type == CONN_DNODE_PEER_SERVER); ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue the message (request) into peer inq */ status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); req_put(msg); return; } struct server_pool *pool = c_conn->owner; dmsg_type_t msg_type = (string_compare(&pool->dc, dc) != 0)? DMSG_REQ_FORWARD : DMSG_REQ; if (p_conn->dnode_secured) { //Encrypting and adding header for a request if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); } //write dnode header if (ENCRYPTION) { status = dyn_aes_encrypt_msg(msg, p_conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(msg); return; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the msg payload"); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } } else { //write dnode header dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } mbuf_insert_head(&msg->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } conn_enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
rstatus_t dnode_peer_handshake_announcing(void *rmsg) { rstatus_t status; struct ring_msg *msg = rmsg; struct server_pool *sp = msg->sp; log_debug(LOG_VVERB, "dyn: handshaking peers"); struct array *peers = &sp->peers; uint32_t i,nelem; nelem = array_n(peers); //we assume one mbuf is enough for now - will enhance with multiple mbufs later struct mbuf *mbuf = mbuf_get(); if (mbuf == NULL) { log_debug(LOG_VVERB, "Too bad, not enough memory!"); return DN_ENOMEM; } //annoucing myself by sending msg: 'dc$rack$token,started_ts,node_state,node_dns' mbuf_write_string(mbuf, &sp->dc); mbuf_write_char(mbuf, '$'); mbuf_write_string(mbuf, &sp->rack); mbuf_write_char(mbuf, '$'); struct dyn_token *token = (struct dyn_token *) array_get(&sp->tokens, 0); if (token == NULL) { log_debug(LOG_VVERB, "Why? This should not be null!"); mbuf_put(mbuf); return DN_ERROR; } mbuf_write_uint32(mbuf, token->mag[0]); mbuf_write_char(mbuf, ','); int64_t cur_ts = (int64_t)time(NULL); mbuf_write_uint64(mbuf, cur_ts); mbuf_write_char(mbuf, ','); mbuf_write_uint8(mbuf, sp->ctx->dyn_state); mbuf_write_char(mbuf, ','); char *broadcast_addr = get_broadcast_address(sp); mbuf_write_bytes(mbuf, broadcast_addr, dn_strlen(broadcast_addr)); //for each peer, send a registered msg for (i = 0; i < nelem; i++) { struct server *peer = (struct server *) array_get(peers, i); if (peer->is_local) continue; log_debug(LOG_VVERB, "Gossiping to node '%.*s'", peer->name.len, peer->name.data); struct conn * conn = dnode_peer_conn(peer); if (conn == NULL) { //running out of connection due to memory exhaust log_debug(LOG_DEBUG, "Unable to obtain a connection object"); return DN_ERROR; } status = dnode_peer_connect(sp->ctx, peer, conn); if (status != DN_OK ) { dnode_peer_close(sp->ctx, conn); log_debug(LOG_DEBUG, "Error happened in connecting on conn %d", conn->sd); return DN_ERROR; } //conn-> dnode_peer_gossip_forward(sp->ctx, conn, sp->redis, mbuf); //peer_gossip_forward1(sp->ctx, conn, sp->redis, &data); } //free this as nobody else will do //mbuf_put(mbuf); return DN_OK; }
void dyn_parse_req(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_req, start to process request :::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_REQ && dmsg->type != DMSG_REQ_FORWARD && dmsg->type != GOSSIP_SYN) { r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); data_store_parse_req(r); } //substract alraedy received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_req(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (dmsg->type == GOSSIP_SYN) { //TODOs: need to address multi-buffer msg later dmsg->payload = b->pos; b->pos = b->pos + dmsg->plen; r->pos = b->pos; done_parsing = true; } if (done_parsing) return; return data_store_parse_req(r); } //bad case if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Bad or splitted message"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_info("sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* dnode sends a response back to a peer */ static struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; // SMB: There is some non trivial thing happening here. And I think it is very // important to read this before anything is changed in here. There is also a // bug that exists which I will mention briefly: // A message is a structure that has a list of mbufs which hold the actual data. // Each mbuf has start, pos, last as pointers (amongst others) which indicate start of the // buffer, current read position and end of the buffer respectively. // // Every time a message is sent to a peer within dynomite, a DNODE header is // prepended which is created using dmsg_write. A message remembers this case // in dnode_header_prepended, so that if the messsage is sent in parts, the // header is not prepended again for the subsequent parts. // // Like I said earlier there is a pos pointer in mbuf. If a message is sent // partially (or it is parsed partially too I think) the pos reflects that // case such that things can be resumed where it left off. // // dmsg_write has a parameter which reflects the payload length following the // dnode header calculated by msg_length. msg_length is a summation of all // mbuf sizes (last - start). Which I think is wrong. // // +------------+ +---------------+ // | DC1N1 +---------> | DC2N1 | // +------------+ +-------+-------+ // | // | // | // | // +-------v-------+ // | DC2N2 | // +---------------+ // // Consider the case where // a node DC1N1 in region DC1 sends a request to DC2N1 which forwards it to // to local token owner DC2N2. Now DC2N1 receives a response from DC2N2 which // has to be relayed back to DC1N1. This response from DC2N2 already has a // dnode header but for the link between DC2N1 and DC2N2. DC2N1 should strip // this header and prepend its own header for sending it back to DC1N1. This // gets handled in encryption case since we overwrite all mbufs in the response // However if the encryption is off, the message length sent to dmsg_write // consists of the header from DC2N2 also which is wrong. So this relaying // of responses will not work for the case where encryption is disabled. // // So msg_length should really be from mbuf->pos and not mbuf->start. This // is a problem only with remote region replication since that is the only // case where we CAN have 2 hops to send the request/response. This is also // not a problem if encryption is ON. ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; if (rsp->dnode_header_prepended) { return rsp; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_debug(LOG_VERB, "sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } rsp->dnode_header_prepended = 1; mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* * copy one response from src to dst * return bytes copied * */ static rstatus_t memcache_copy_bulk(struct msg *dst, struct msg *src) { struct mbuf *mbuf, *nbuf; uint8_t *p; uint32_t len = 0; uint32_t bytes = 0; uint32_t i = 0; for (mbuf = STAILQ_FIRST(&src->mhdr); mbuf && mbuf_empty(mbuf); mbuf = STAILQ_FIRST(&src->mhdr)) { mbuf_remove(&src->mhdr, mbuf); mbuf_put(mbuf); } mbuf = STAILQ_FIRST(&src->mhdr); if (mbuf == NULL) { return NC_OK; /* key not exists */ } p = mbuf->pos; /* get : VALUE key 0 len\r\nval\r\n */ /* gets: VALUE key 0 len cas\r\nval\r\n */ ASSERT(*p == 'V'); for (i = 0; i < 3; i++) { /* eat 'VALUE key 0 ' */ for (; *p != ' ';) { p++; } p++; } len = 0; for (; p < mbuf->last && isdigit(*p); p++) { len = len * 10 + (uint32_t)(*p - '0'); } for (; p < mbuf->last && ('\r' != *p); p++) { /* eat cas for gets */ ; } len += CRLF_LEN * 2; len += (p - mbuf->pos); bytes = len; /* copy len bytes to dst */ for (; mbuf;) { if (mbuf_length(mbuf) <= len) { /* steal this mbuf from src to dst */ nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&src->mhdr, mbuf); mbuf_insert(&dst->mhdr, mbuf); len -= mbuf_length(mbuf); mbuf = nbuf; } else { /* split it */ nbuf = mbuf_get(); if (nbuf == NULL) { return NC_ENOMEM; } mbuf_copy(nbuf, mbuf->pos, len); mbuf_insert(&dst->mhdr, nbuf); mbuf->pos += len; break; } } dst->mlen += bytes; src->mlen -= bytes; log_debug(LOG_VVERB, "memcache_copy_bulk copy bytes: %d", bytes); return NC_OK; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_peer_req_forward entering"); } rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } ASSERT(!p_conn->dnode_client && !p_conn->dnode_server); ASSERT(c_conn->client); /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&p_conn->imsg_q)) { status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } } uint64_t msg_id = peer_msg_id++; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); return; } if (p_conn->dnode_secured) { //Encrypting and adding header for a request struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //TODOs: need to deal with multi-block later log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, p_conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); log_hexdump(LOG_VERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); mbuf_insert(&msg->mhdr, encrypted_buf); //free it as no one will need it again mbuf_put(data_buf); } else { //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, 0); mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } p_conn->enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (TRACING_LEVEL == LOG_VERB) { log_debug(LOG_VERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
static bool dyn_parse_core(struct msg *r) { struct dmsg *dmsg; struct mbuf *b; uint8_t *p, *token; uint8_t ch = ' '; uint64_t num = 0; dyn_state = r->dyn_state; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dyn_state: %d", r->dyn_state); } if (r->dyn_state == DYN_DONE || r->dyn_state == DYN_POST_DONE) return true; b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg = r->dmsg; if (dmsg == NULL) { r->dmsg = dmsg_get(); dmsg = r->dmsg; dmsg->owner = r; if (dmsg == NULL) {//should track this as a dropped message loga("unable to create a new dmsg"); goto error; //should count as OOM error } } token = NULL; for (p = r->pos; p < b->last; p++) { ch = *p; switch (dyn_state) { case DYN_START: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_START"); } if (ch != ' ' && ch != '$') { break; } if (ch == ' ') { if (token == NULL) token = p; break; } if (ch == '$') { if (p + 5 < b->last) { if ((*(p+1) == '2') && (*(p+2) == '0') && (*(p+3) == '1') && (*(p+4) == '4') && (*(p+5) == '$')) { dyn_state = DYN_MAGIC_STRING; p += 5; } else { //goto skip; token = NULL; //reset } } else { goto split; } } else { loga("Facing a weird char %c", p); //goto skip; token = NULL; //reset } break; case DYN_MAGIC_STRING: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MAGIC_STRING"); } if (ch == ' ') { dyn_state = DYN_MSG_ID; num = 0; break; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; loga("Facing a weird char %c", p); //goto skip; dyn_state = DYN_START; } break; case DYN_MSG_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MSG_ID"); log_debug(LOG_DEBUG, "num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "MSG ID : %d", num); } dmsg->id = num; dyn_state = DYN_TYPE_ID; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); //goto skip; token = NULL; //reset dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_TYPE_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_TYPE_ID: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Type Id: %d", num); } dmsg->type = num; dyn_state = DYN_BIT_FIELD; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_BIT_FIELD: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD, num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD : %d", num); } dmsg->bit_field = num & 0xF; dyn_state = DYN_VERSION; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_VERSION: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_VERSION: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "VERSION : %d", num); } dmsg->version = num; dyn_state = DYN_SAME_DC; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_SAME_DC: if (isdigit(ch)) { dmsg->same_dc = ch - '0'; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SAME_DC %d", dmsg->same_dc); } } else if (ch == ' ' && isdigit(*(p-1))) { dyn_state = DYN_DATA_LEN; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA_LEN: num = %d", num); } if (ch == '*') { break; } else if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Data len: %d", num); } dmsg->mlen = num; dyn_state = DYN_DATA; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA"); } if (p + dmsg->mlen < b->last) { dmsg->data = p; p += dmsg->mlen - 1; dyn_state = DYN_SPACES_BEFORE_PAYLOAD_LEN; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); goto split; } break; case DYN_SPACES_BEFORE_PAYLOAD_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_PAYLOAD_LEN"); } if (ch == ' ') { break; } else if (ch == '*') { dyn_state = DYN_PAYLOAD_LEN; num = 0; } break; case DYN_PAYLOAD_LEN: if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == CR) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Payload len: %d", num); } dmsg->plen = num; num = 0; dyn_state = DYN_CRLF_BEFORE_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_CRLF_BEFORE_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_DONE"); } if (*p == LF) { dyn_state = DYN_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DONE"); } r->pos = p; dmsg->payload = p; r->dyn_state = DYN_DONE; b->pos = p; goto done; break; default: NOT_REACHED(); break; } } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Not fully parsed yet!!!!!!"); } split: //this is an attempt recovery when we got a bad message //we try to look for the start the next good one and throw away the bad part if (r->dyn_state == DYN_START) { r->result = MSG_PARSE_AGAIN; if (b->last == b->end) { struct mbuf *nbuf = mbuf_get(); if (nbuf == NULL) { loga("Unable to obtain a new mbuf for replacement!"); mbuf_put(b); nbuf = mbuf_get(); mbuf_insert_head(&r->mhdr, nbuf); r->pos = nbuf->pos; return false; } //replacing the bad mbuf with a new and empty mbuf mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->pos = nbuf->pos; return false; } else { //split it and throw away the bad portion struct mbuf *nbuf; nbuf = mbuf_split(&r->mhdr, r->pos, NULL, NULL); if (nbuf == NULL) { return DN_ENOMEM; } mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); r->pos = nbuf->pos; return false; } } if (mbuf_length(b) == 0 || b->last == b->end) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Would this case ever happen?"); } r->result = MSG_PARSE_AGAIN; return false; } if (r->pos == b->last) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Forward to reading the new block of data"); } r->dyn_state = DYN_START; r->result = MSG_PARSE_AGAIN; token = NULL; return false; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "in split"); } r->dyn_state = DYN_START; r->pos = token; r->result = MSG_PARSE_REPAIR; if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, b->pos, mbuf_length(b), "split and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "split and inspecting full req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return false; done: r->pos = p; dmsg->source_address = r->owner->addr; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "at done with p at %d", p); log_hexdump(LOG_VVERB, r->pos, b->last - r->pos, "done and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return true; error: log_debug(LOG_ERR, "at error for state %d and c %c", dyn_state, *p); r->result = MSG_PARSE_ERROR; r->pos = p; errno = EINVAL; if (log_loggable(LOG_ERR)) { log_hexdump(LOG_ERR, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); log_hexdump(LOG_ERR, p, b->last - p, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); } r->dyn_state = dyn_state; return false; }
/* * Sending a mbuf of gossip data over the wire to a peer */ void dnode_peer_gossip_forward(struct context *ctx, struct conn *conn, bool redis, struct mbuf *data_buf) { rstatus_t status; struct msg *msg = msg_get(conn, 1, redis); if (msg == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a msg"); return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a data_buf"); msg_put(msg); return; } uint64_t msg_id = peer_msg_id++; if (conn->dnode_secured) { log_debug(LOG_VERB, "Assemble a secured msg to send"); log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an data_buf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_insert(&msg->mhdr, encrypted_buf); //free data_buf as no one will need it again mbuf_put(data_buf); } else { log_debug(LOG_VERB, "Assemble a non-secured msg to send"); dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert_head(&msg->mhdr, header_buf); mbuf_insert(&msg->mhdr, data_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn gossip message header: "); msg_dump(msg); } /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&conn->imsg_q)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { dnode_req_forward_error(ctx, conn, msg); conn->err = errno; return; } } //need to handle a reply //conn->enqueue_outq(ctx, conn, msg); msg->noreply = 1; conn->enqueue_inq(ctx, conn, msg); }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); return data_store_parse_rsp(r); } //Subtract already received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_rsp(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (done_parsing) return; return data_store_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
static void rsp_forward(struct context *ctx, struct conn *s_conn, struct msg *msg) { rstatus_t status; struct msg *pmsg; struct conn *c_conn; ASSERT(!s_conn->client && !s_conn->proxy); /* response from server implies that server is ok and heartbeating */ server_ok(ctx, s_conn); /* dequeue peer message (request) from server */ pmsg = TAILQ_FIRST(&s_conn->omsg_q); ASSERT(pmsg != NULL && pmsg->peer == NULL); ASSERT(pmsg->request && !pmsg->done); s_conn->dequeue_outq(ctx, s_conn, pmsg); pmsg->done = 1; /* establish msg <-> pmsg (response <-> request) link */ pmsg->peer = msg; msg->peer = pmsg; /* * Readjust responses of fragmented messages by not including the end * marker for all but the last response * * Valid responses for a fragmented requests are MSG_RSP_VALUE or, * MSG_RSP_END. For an invalid response, we send out SERVER_ERRROR with * EINVAL errno */ if (pmsg->frag_id != 0) { if (msg->type != MSG_RSP_VALUE && msg->type != MSG_RSP_END) { pmsg->error = 1; pmsg->err = EINVAL; } else if (!pmsg->last_fragment) { ASSERT(msg->end != NULL); for (;;) { struct mbuf *mbuf; mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next); ASSERT(mbuf != NULL); /* * We cannot assert that end marker points to the last mbuf * Consider a scenario where end marker points to the * penultimate mbuf and the last mbuf only contains spaces * and CRLF: mhdr -> [...END] -> [\r\n] */ if (msg->end >= mbuf->pos && msg->end < mbuf->last) { /* end marker is within this mbuf */ msg->mlen -= (uint32_t)(mbuf->last - msg->end); mbuf->last = msg->end; break; } /* end marker is not in this mbuf */ msg->mlen -= mbuf_length(mbuf); mbuf_remove(&msg->mhdr, mbuf); mbuf_put(mbuf); } } } c_conn = pmsg->owner; ASSERT(c_conn->client && !c_conn->proxy); if (req_done(c_conn, TAILQ_FIRST(&c_conn->omsg_q))) { status = event_add_out(ctx->ep, c_conn); if (status != NC_OK) { c_conn->err = errno; } } rsp_forward_stats(ctx, s_conn->owner, msg); }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_rsp_send_next entering"); } ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *msg = rsp_send_next(ctx, conn); if (msg != NULL && conn->dyn_mode) { struct msg *pmsg = TAILQ_FIRST(&conn->omsg_q); //peer request's msg //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //if (ENCRYPTION) { struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return NULL; //TODOs: need to clean up } rstatus_t status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(encrypted_buf)); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "resp dyn message - original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_copy(header_buf, encrypted_buf->start, mbuf_length(encrypted_buf)); mbuf_insert(&msg->mhdr, header_buf); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); //mbuf_insert(&msg->mhdr, encrypted_buf); mbuf_put(data_buf); mbuf_put(encrypted_buf); //} else { // log_debug(LOG_VERB, "no encryption on the response's payload"); // dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(data_buf)); //} } else { dmsg_write(header_buf, msg_id, DMSG_RES, conn, 0);//Dont care about 0 or the real length as we don't use that value in unencryption mode mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(msg); } } return msg; }
static void *check_thread_run(void *args) { int ret; thread_data *cdata = args; redis_group *srgroup = cdata->srgroup; redis_group *trgroup = cdata->trgroup; dict *nodes; dictEntry *de; dictIterator *di; redis_node *rnode; struct mbuf *mbuf; nodes = srgroup->nodes; di = dictGetIterator(nodes); while ((de = dictNext(di)) != NULL) { rnode = dictGetVal(de); rnode->write_data = cdata; /* remove the not used part for source redis node */ if (rnode->rdb != NULL) { redis_rdb_deinit(rnode->rdb); rmt_free(rnode->rdb); rnode->rdb = NULL; } if (rnode->cmd_data != NULL) { while (!mttlist_empty(rnode->cmd_data)) { mbuf = mttlist_pop(rnode->cmd_data); mbuf_put(mbuf); } mttlist_destroy(rnode->cmd_data); rnode->cmd_data = NULL; } if (rnode->sockpairfds[0] > 0) { close(rnode->sockpairfds[0]); rnode->sockpairfds[0] = -1; } if (rnode->sockpairfds[1] > 0) { close(rnode->sockpairfds[1]); rnode->sockpairfds[1] = -1; } if (rnode->rr != NULL) { redis_replication_deinit(rnode->rr); rmt_free(rnode->rr); rnode->rr = NULL; } if (rnode->piece_data != NULL) { while ((mbuf = listPop(rnode->piece_data)) != NULL) { mbuf_put(mbuf); } listRelease(rnode->piece_data); rnode->piece_data = NULL; } /* add the used part for source redis node */ if (rnode->send_data == NULL) { rnode->send_data = listCreate(); if (rnode->send_data == NULL) { log_error("ERROR: Create msg list failed: out of memory"); return 0; } } if (rnode->sent_data == NULL) { rnode->sent_data = listCreate(); if (rnode->sent_data == NULL) { log_error("ERROR: Create msg list failed: out of memory"); return 0; } } if (rnode->sk_event < 0) { rnode->sk_event = socket(AF_INET, SOCK_STREAM, 0); if(rnode->sk_event < 0){ log_error("ERROR: Create sk_event for node[%s] failed: %s", rnode->addr, strerror(errno)); return 0; } } ret = aeCreateFileEvent(cdata->loop, rnode->sk_event, AE_WRITABLE, check_begin, rnode); if (ret != AE_OK) { log_error("ERROR: send_data event create %ld failed: %s", cdata->thread_id, strerror(errno)); return 0; } } dictReleaseIterator(di); nodes = trgroup->nodes; di = dictGetIterator(nodes); while ((de = dictNext(di)) != NULL) { rnode = dictGetVal(de); rnode->write_data = cdata; } dictReleaseIterator(di); aeMain(cdata->loop); return 0; }