/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_rsp_send_next entering"); } ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *msg = rsp_send_next(ctx, conn); if (msg != NULL && conn->dyn_mode) { struct msg *pmsg = TAILQ_FIRST(&conn->omsg_q); //peer request's msg //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //if (ENCRYPTION) { struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return NULL; //TODOs: need to clean up } rstatus_t status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(encrypted_buf)); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "resp dyn message - original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_copy(header_buf, encrypted_buf->start, mbuf_length(encrypted_buf)); mbuf_insert(&msg->mhdr, header_buf); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); //mbuf_insert(&msg->mhdr, encrypted_buf); mbuf_put(data_buf); mbuf_put(encrypted_buf); //} else { // log_debug(LOG_VERB, "no encryption on the response's payload"); // dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(data_buf)); //} } else { dmsg_write(header_buf, msg_id, DMSG_RES, conn, 0);//Dont care about 0 or the real length as we don't use that value in unencryption mode mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(msg); } } return msg; }
/* * Sending a mbuf of gossip data over the wire to a peer */ void dnode_peer_gossip_forward(struct context *ctx, struct conn *conn, bool redis, struct mbuf *data_buf) { rstatus_t status; struct msg *msg = msg_get(conn, 1, redis); if (msg == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a msg"); return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a data_buf"); msg_put(msg); return; } uint64_t msg_id = peer_msg_id++; if (conn->dnode_secured) { log_debug(LOG_VERB, "Assemble a secured msg to send"); log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an data_buf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_insert(&msg->mhdr, encrypted_buf); //free data_buf as no one will need it again mbuf_put(data_buf); } else { log_debug(LOG_VERB, "Assemble a non-secured msg to send"); dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert_head(&msg->mhdr, header_buf); mbuf_insert(&msg->mhdr, data_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn gossip message header: "); msg_dump(msg); } /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&conn->imsg_q)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { dnode_req_forward_error(ctx, conn, msg); conn->err = errno; return; } } //need to handle a reply //conn->enqueue_outq(ctx, conn, msg); msg->noreply = 1; conn->enqueue_inq(ctx, conn, msg); }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_peer_req_forward entering"); } rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } ASSERT(!p_conn->dnode_client && !p_conn->dnode_server); ASSERT(c_conn->client); /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&p_conn->imsg_q)) { status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } } uint64_t msg_id = peer_msg_id++; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); return; } if (p_conn->dnode_secured) { //Encrypting and adding header for a request struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //TODOs: need to deal with multi-block later log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, p_conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); log_hexdump(LOG_VERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); mbuf_insert(&msg->mhdr, encrypted_buf); //free it as no one will need it again mbuf_put(data_buf); } else { //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, 0); mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } p_conn->enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (TRACING_LEVEL == LOG_VERB) { log_debug(LOG_VERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_info("sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* dnode sends a response back to a peer */ static struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { rstatus_t status; // SMB: There is some non trivial thing happening here. And I think it is very // important to read this before anything is changed in here. There is also a // bug that exists which I will mention briefly: // A message is a structure that has a list of mbufs which hold the actual data. // Each mbuf has start, pos, last as pointers (amongst others) which indicate start of the // buffer, current read position and end of the buffer respectively. // // Every time a message is sent to a peer within dynomite, a DNODE header is // prepended which is created using dmsg_write. A message remembers this case // in dnode_header_prepended, so that if the messsage is sent in parts, the // header is not prepended again for the subsequent parts. // // Like I said earlier there is a pos pointer in mbuf. If a message is sent // partially (or it is parsed partially too I think) the pos reflects that // case such that things can be resumed where it left off. // // dmsg_write has a parameter which reflects the payload length following the // dnode header calculated by msg_length. msg_length is a summation of all // mbuf sizes (last - start). Which I think is wrong. // // +------------+ +---------------+ // | DC1N1 +---------> | DC2N1 | // +------------+ +-------+-------+ // | // | // | // | // +-------v-------+ // | DC2N2 | // +---------------+ // // Consider the case where // a node DC1N1 in region DC1 sends a request to DC2N1 which forwards it to // to local token owner DC2N2. Now DC2N1 receives a response from DC2N2 which // has to be relayed back to DC1N1. This response from DC2N2 already has a // dnode header but for the link between DC2N1 and DC2N2. DC2N1 should strip // this header and prepend its own header for sending it back to DC1N1. This // gets handled in encryption case since we overwrite all mbufs in the response // However if the encryption is off, the message length sent to dmsg_write // consists of the header from DC2N2 also which is wrong. So this relaying // of responses will not work for the case where encryption is disabled. // // So msg_length should really be from mbuf->pos and not mbuf->start. This // is a problem only with remote region replication since that is the only // case where we CAN have 2 hops to send the request/response. This is also // not a problem if encryption is ON. ASSERT(conn->type == CONN_DNODE_PEER_CLIENT); struct msg *rsp = rsp_send_next(ctx, conn); if (rsp != NULL && conn->dyn_mode) { struct msg *pmsg = rsp->peer; //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; if (rsp->dnode_header_prepended) { return rsp; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } dmsg_type_t msg_type = DMSG_RES; //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } if (ENCRYPTION) { status = dyn_aes_encrypt_msg(rsp, conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(rsp); return NULL; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the rsp payload"); } dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } } else { //write dnode header log_debug(LOG_VERB, "sending dnode response with msg_id %u", msg_id); dmsg_write(header_buf, msg_id, msg_type, conn, msg_length(rsp)); } rsp->dnode_header_prepended = 1; mbuf_insert_head(&rsp->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(rsp); } } return rsp; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { struct server *server = p_conn->owner; log_debug(LOG_DEBUG, "forwarding request from client conn '%s' to peer conn '%s' on rack '%.*s' dc '%.*s' ", dn_unresolve_peer_desc(c_conn->sd), dn_unresolve_peer_desc(p_conn->sd), rack->name->len, rack->name->data, server->dc.len, server->dc.data); struct string *dc = rack->dc; rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply && !msg->swallow) { conn_enqueue_outq(ctx, c_conn, msg); } ASSERT(p_conn->type == CONN_DNODE_PEER_SERVER); ASSERT((c_conn->type == CONN_CLIENT) || (c_conn->type == CONN_DNODE_PEER_CLIENT)); /* enqueue the message (request) into peer inq */ status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); req_put(msg); return; } struct server_pool *pool = c_conn->owner; dmsg_type_t msg_type = (string_compare(&pool->dc, dc) != 0)? DMSG_REQ_FORWARD : DMSG_REQ; if (p_conn->dnode_secured) { //Encrypting and adding header for a request if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); } //write dnode header if (ENCRYPTION) { status = dyn_aes_encrypt_msg(msg, p_conn->aes_key); if (status == DN_ERROR) { loga("OOM to obtain an mbuf for encryption!"); mbuf_put(header_buf); req_put(msg); return; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } else { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VERB, "no encryption on the msg payload"); } dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } } else { //write dnode header dmsg_write(header_buf, msg->id, msg_type, p_conn, msg_length(msg)); } mbuf_insert_head(&msg->mhdr, header_buf); if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } conn_enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }