/* * Put partial data to the next mbuf. */ struct mbuf * mbuf_split(struct context *ctx, struct mbuf *mbuf, uint8_t *pos, mbuf_copy_t cb, void *cbarg) { struct mbuf *nbuf; size_t size; nbuf = mbuf_get(ctx); if (nbuf == NULL) { return NULL; } if (cb != NULL) { /* precopy nbuf */ cb(nbuf, cbarg); } /* copy data from mbuf to nbuf */ size = (size_t)(mbuf->last - pos); mbuf_copy(nbuf, pos, size); /* adjust mbuf */ mbuf->last = pos; return nbuf; }
/* * fill the mbuf in the msg with the content */ int msg_append_full(struct msg *msg, const uint8_t *pos, uint32_t n) { struct mbuf *mbuf; uint32_t left, len; mbuf_base *mb = msg->mb; const uint8_t *start; start = pos; left = n; while (left > 0) { mbuf = listLastValue(msg->data); if (mbuf == NULL || mbuf_size(mbuf) == 0) { mbuf = mbuf_get(mb); if (mbuf == NULL) { log_error("ERROR: Mbuf get failed: out of memory"); return RMT_ENOMEM; } listAddNodeTail(msg->data, mbuf); } len = MIN(left, mbuf_size(mbuf)); mbuf_copy(mbuf, start, len); left -= len; start += len; msg->mlen += len; } return RMT_OK; }
errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) { /* Must set *mbuf to NULL in failure case */ errno_t error = 0; int created = 0; if (mbuf == NULL) return (EINVAL); if (*mbuf == NULL) { error = mbuf_get(how, type, mbuf); if (error) return (error); created = 1; } /* * At the time this code was written, m_mclget would always * return the same value that was passed in to it. */ *mbuf = m_mclget(*mbuf, how); if (created && ((*mbuf)->m_flags & M_EXT) == 0) { mbuf_free(*mbuf); *mbuf = NULL; } if (*mbuf == NULL || ((*mbuf)->m_flags & M_EXT) == 0) error = ENOMEM; return (error); }
static rstatus_t conn_recv_queue(struct conn *conn) { struct mbuf *mbuf; size_t msize; /* current mbuf size */ ssize_t n; mbuf = STAILQ_LAST(&conn->recv_queue, mbuf, next); if (mbuf == NULL || mbuf_full(mbuf)) { mbuf = mbuf_get(); if (mbuf == NULL) { return NC_ENOMEM; } mbuf_insert(&conn->recv_queue, mbuf); } msize = mbuf_size(mbuf); ASSERT(msize > 0); n = conn_recv_buf(conn, mbuf->last, msize); if (n < 0) { if (n == NC_EAGAIN) { return NC_OK; } return NC_ERROR; } ASSERT((mbuf->last + n) <= mbuf->end); mbuf->last += n; return NC_OK; }
/* * Split mbuf h into h and t by copying data from h to t. Before * the copy, we invoke a precopy handler cb that will copy a predefined * string to the head of t. * * Return new mbuf t, if the split was successful. */ struct mbuf * mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg) { struct mbuf *mbuf, *nbuf; size_t size; ASSERT(!STAILQ_EMPTY(h)); mbuf = STAILQ_LAST(h, mbuf, next); ASSERT(pos >= mbuf->pos && pos <= mbuf->last); nbuf = mbuf_get(); if (nbuf == NULL) { return NULL; } if (cb != NULL) { /* precopy nbuf */ cb(nbuf, cbarg); } /* copy data from mbuf to nbuf */ size = (size_t)(mbuf->last - pos); mbuf_copy(nbuf, pos, size); /* adjust mbuf */ mbuf->last = pos; log_debug(LOG_VVERB, "split into mbuf %p len %"PRIu32" and nbuf %p len " "%"PRIu32" copied %zu bytes", mbuf, mbuf_length(mbuf), nbuf, mbuf_length(nbuf), size); return nbuf; }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); } if (r->redis) { return redis_parse_rsp(r); } return memcache_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
struct mbuf *mbuf_queue_top(struct context *ctx, struct mhdr *mhdr) { if (STAILQ_EMPTY(mhdr)) { struct mbuf *buf = mbuf_get(ctx); mbuf_queue_insert(mhdr, buf); return buf; } return STAILQ_LAST(mhdr, mbuf, next); }
struct mbuf *mbuf_queue_get(struct context *ctx, struct mhdr *q) { struct mbuf *buf = NULL; if (!STAILQ_EMPTY(q)) buf = STAILQ_LAST(q, mbuf, next); if (buf == NULL || mbuf_full(buf)) { buf = mbuf_get(ctx); STAILQ_INSERT_TAIL(q, buf, next); } return buf; }
rstatus_t dnode_peer_forward_state(void *rmsg) { rstatus_t status; struct ring_msg *msg = rmsg; struct server_pool *sp = msg->sp; log_debug(LOG_VVERB, "dnode_peer_forward_state: forwarding"); //we assume one mbuf is enough for now - will enhance with multiple mbufs later struct mbuf *mbuf = mbuf_get(); if (mbuf == NULL) { log_debug(LOG_VVERB, "Too bad, not enough memory!"); return DN_ENOMEM; } mbuf_copy(mbuf, msg->data, msg->len); struct array *peers = &sp->peers; uint32_t i,nelem; nelem = array_n(peers); //pick a random peer int ran_index = rand() % nelem; if (ran_index == 0) ran_index += 1; struct server *peer = (struct server *) array_get(peers, ran_index); //log_debug(LOG_VVERB, "Gossiping to node '%.*s'", peer->name.len, peer->name.data); struct conn * conn = dnode_peer_conn(peer); if (conn == NULL) { //running out of connection due to memory exhaust log_debug(LOG_ERR, "Unable to obtain a connection object"); return DN_ERROR; } status = dnode_peer_connect(sp->ctx, peer, conn); if (status != DN_OK ) { dnode_peer_close(sp->ctx, conn); log_debug(LOG_ERR, "Error happened in connecting on conn %d", conn->sd); return DN_ERROR; } dnode_peer_gossip_forward(sp->ctx, conn, sp->redis, mbuf); //free this as nobody else will do //mbuf_put(mbuf); return status; }
// 'unprocessed buf': buf is full and has data unprocessed. // // 1. If last buf is nut full, it is returned. // 2. If `unprocessed` is true and the last buf is the unprocessed buf, // the last buf is returned. // 3. Otherwise a new buf is returned. struct mbuf *conn_get_buf(struct connection *conn, bool unprocessed) { struct mbuf *buf = NULL; struct conn_info *info = conn->info; if (!TAILQ_EMPTY(&info->data)) { buf = TAILQ_LAST(&info->data, mhdr); } if (buf == NULL || (unprocessed ? buf->pos : buf->last) >= buf->end) { buf = mbuf_get(conn->ctx); buf->queue = &info->data; TAILQ_INSERT_TAIL(&info->data, buf, next); } return buf; }
/* * 'unprocessed buf': buf is full and has data unprocessed. * * 1. If last buf is nut full, it is returned. * 2. If `unprocessed` is true and the last buf is the unprocessed buf, * the last buf is returned. * 3. Otherwise a new buf is returned. * * `local` means whether to get buf from `info->local_data` or `info->data`. */ struct mbuf *conn_get_buf(struct connection *conn, bool unprocessed, bool local) { struct mbuf *buf = NULL; struct mhdr *queue = local ? &conn->info->local_data : &conn->info->data; if (!TAILQ_EMPTY(queue)) { buf = TAILQ_LAST(queue, mhdr); } if (buf == NULL || (unprocessed ? buf->pos : buf->last) >= buf->end) { buf = mbuf_get(conn->ctx); buf->queue = queue; TAILQ_INSERT_TAIL(queue, buf, next); } return buf; }
/* * prepend small(small than a mbuf) content into msg */ int msg_prepend(struct msg *msg, uint8_t *pos, size_t n) { mbuf_base *mb = msg->mb; struct mbuf *mbuf; mbuf = mbuf_get(mb); if (mbuf == NULL) { return RMT_ENOMEM; } ASSERT(n <= mbuf_size(mbuf)); mbuf_copy(mbuf, pos, n); msg->mlen += (uint32_t)n; listAddNodeHead(msg->data, mbuf); return RMT_OK; }
struct msg * memcache_generate_error(struct msg *r, err_t err) { struct mbuf *mbuf; int n; char *protstr = "SERVER_ERROR"; char *errstr = err ? strerror(err) : "unknown"; r->type = MSG_RSP_MC_SERVER_ERROR; mbuf = mbuf_get(); if (mbuf == NULL) { return NULL; } mbuf_insert(&r->mhdr, mbuf); n = nc_scnprintf(mbuf->last, mbuf_size(mbuf), "%s %s"CRLF, protstr, errstr); mbuf->last += n; r->mlen = (uint32_t)n; return r; }
static void direct_reply(struct context *ctx, struct conn *conn, struct msg *smsg, char *_msg) { struct mbuf *mbuf; int n; struct msg *msg = msg_get(conn, true, conn->redis); if (msg == NULL) { conn->err = errno; conn->done = 1; return; } mbuf = STAILQ_LAST(&msg->mhdr, mbuf, next); if (mbuf == NULL || mbuf_full(mbuf)) { mbuf = mbuf_get(); if (mbuf != NULL) { mbuf_insert(&msg->mhdr, mbuf); msg->pos = mbuf->pos; } } if (mbuf == NULL) { conn->err = errno; conn->done = 1; msg_put(msg); return; } smsg->peer = msg; msg->peer = smsg; msg->request = 0; n = (int)strlen(_msg); memcpy(mbuf->last, _msg, (size_t)n); mbuf->last += n; msg->mlen += (uint32_t)n; smsg->done = 1; event_add_out(ctx->evb, conn); conn->enqueue_outq(ctx, conn, smsg); }
struct mbuf * msg_ensure_mbuf(struct msg *msg, size_t len) { listNode *node; mbuf_base *mb = msg->mb; struct mbuf *mbuf; node = listLast(msg->data); if (node == NULL || mbuf_size(listNodeValue(node)) < len) { mbuf = mbuf_get(mb); if (mbuf == NULL) { return NULL; } listAddNodeTail(msg->data, mbuf); } else { mbuf = listNodeValue(node); } return mbuf; }
/* * note: we should not call conn_add_out here. * because we may call append many times. */ rstatus_t conn_sendq_append(struct conn *conn, char *pos, size_t n) { struct mbuf *mbuf; size_t bytes = 0; size_t len; while (bytes < n) { mbuf = STAILQ_LAST(&conn->send_queue, mbuf, next); if ((mbuf == NULL) || mbuf_full(mbuf)) { mbuf = mbuf_get(); if (mbuf == NULL) { return NC_ENOMEM; } mbuf_insert(&conn->send_queue, mbuf); } len = MIN(mbuf_size(mbuf), n - bytes); mbuf_copy(mbuf, (uint8_t *)pos + bytes, len); bytes += len; } return NC_OK; }
/* * prepend small(small than a mbuf) content into msg */ int msg_prepend_format(struct msg *msg, const char *fmt, ...) { mbuf_base *mb = msg->mb; struct mbuf *mbuf; int32_t n; va_list args; mbuf = mbuf_get(mb); if (mbuf == NULL) { return RMT_ENOMEM; } va_start(args, fmt); n = rmt_vscnprintf(mbuf->last, mbuf_size(mbuf), fmt, args); va_end(args); mbuf->last += n; msg->mlen += (uint32_t)n; listAddNodeHead(msg->data, mbuf); return RMT_OK; }
/* * Sending a mbuf of gossip data over the wire to a peer */ void dnode_peer_gossip_forward(struct context *ctx, struct conn *conn, bool redis, struct mbuf *data_buf) { rstatus_t status; struct msg *msg = msg_get(conn, 1, redis); if (msg == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a msg"); return; } struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { log_debug(LOG_DEBUG, "Unable to obtain a data_buf"); msg_put(msg); return; } uint64_t msg_id = peer_msg_id++; if (conn->dnode_secured) { log_debug(LOG_VERB, "Assemble a secured msg to send"); log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an data_buf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_insert(&msg->mhdr, encrypted_buf); //free data_buf as no one will need it again mbuf_put(data_buf); } else { log_debug(LOG_VERB, "Assemble a non-secured msg to send"); dmsg_write_mbuf(header_buf, msg_id, GOSSIP_SYN, conn, mbuf_length(data_buf)); mbuf_insert_head(&msg->mhdr, header_buf); mbuf_insert(&msg->mhdr, data_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn gossip message header: "); msg_dump(msg); } /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&conn->imsg_q)) { status = event_add_out(ctx->evb, conn); if (status != DN_OK) { dnode_req_forward_error(ctx, conn, msg); conn->err = errno; return; } } //need to handle a reply //conn->enqueue_outq(ctx, conn, msg); msg->noreply = 1; conn->enqueue_inq(ctx, conn, msg); }
/* dnode sends a response back to a peer */ struct msg * dnode_rsp_send_next(struct context *ctx, struct conn *conn) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_rsp_send_next entering"); } ASSERT(conn->dnode_client && !conn->dnode_server); struct msg *msg = rsp_send_next(ctx, conn); if (msg != NULL && conn->dyn_mode) { struct msg *pmsg = TAILQ_FIRST(&conn->omsg_q); //peer request's msg //need to deal with multi-block later uint64_t msg_id = pmsg->dmsg->id; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for header!"); return NULL; //need to address error here properly } //TODOs: need to set the outcoming conn to be secured too if the incoming conn is secured if (pmsg->owner->dnode_secured || conn->dnode_secured) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "Encrypting response ..."); loga("AES encryption key: %s\n", base64_encode(conn->aes_key, AES_KEYLEN)); } struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //if (ENCRYPTION) { struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return NULL; //TODOs: need to clean up } rstatus_t status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, conn->aes_key); if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VERB, "#encrypted bytes : %d", status); } dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(encrypted_buf)); if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, data_buf->pos, mbuf_length(data_buf), "resp dyn message - original payload: "); log_hexdump(LOG_VVERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); } mbuf_copy(header_buf, encrypted_buf->start, mbuf_length(encrypted_buf)); mbuf_insert(&msg->mhdr, header_buf); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); //mbuf_insert(&msg->mhdr, encrypted_buf); mbuf_put(data_buf); mbuf_put(encrypted_buf); //} else { // log_debug(LOG_VERB, "no encryption on the response's payload"); // dmsg_write(header_buf, msg_id, DMSG_RES, conn, mbuf_length(data_buf)); //} } else { dmsg_write(header_buf, msg_id, DMSG_RES, conn, 0);//Dont care about 0 or the real length as we don't use that value in unencryption mode mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "resp dyn message - header: "); msg_dump(msg); } } return msg; }
/* * Do a remote procedure call (RPC) and wait for its reply. * If from_p is non-null, then we are doing broadcast, and * the address from whence the response came is saved there. */ int krpc_call( struct sockaddr_in *sa, u_int sotype, u_int prog, u_int vers, u_int func, mbuf_t *data, /* input/output */ struct sockaddr_in *from_p) /* output */ { socket_t so; struct sockaddr_in *sin; mbuf_t m, nam, mhead; struct rpc_call *call; struct rpc_reply *reply; int error, timo, secs; size_t len; static u_int32_t xid = ~0xFF; u_int16_t tport; size_t maxpacket = 1<<16; /* * Validate address family. * Sorry, this is INET specific... */ if (sa->sin_family != AF_INET) return (EAFNOSUPPORT); /* Free at end if not null. */ nam = mhead = NULL; /* * Create socket and set its recieve timeout. */ if ((error = sock_socket(AF_INET, sotype, 0, 0, 0, &so))) goto out1; { struct timeval tv; tv.tv_sec = 1; tv.tv_usec = 0; if ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) goto out; } /* * Enable broadcast if necessary. */ if (from_p && (sotype == SOCK_DGRAM)) { int on = 1; if ((error = sock_setsockopt(so, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on)))) goto out; } /* * Bind the local endpoint to a reserved port, * because some NFS servers refuse requests from * non-reserved (non-privileged) ports. */ if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m))) goto out; sin = mbuf_data(m); bzero(sin, sizeof(*sin)); mbuf_setlen(m, sizeof(*sin)); sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED; do { tport--; sin->sin_port = htons(tport); error = sock_bind(so, (struct sockaddr*)sin); } while (error == EADDRINUSE && tport > IPPORT_RESERVED / 2); mbuf_freem(m); m = NULL; if (error) { printf("bind failed\n"); goto out; } /* * Setup socket address for the server. */ if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &nam))) goto out; sin = mbuf_data(nam); mbuf_setlen(nam, sa->sin_len); bcopy((caddr_t)sa, (caddr_t)sin, sa->sin_len); if (sotype == SOCK_STREAM) { struct timeval tv; tv.tv_sec = 60; tv.tv_usec = 0; error = sock_connect(so, mbuf_data(nam), MSG_DONTWAIT); if (error && (error != EINPROGRESS)) goto out; error = sock_connectwait(so, &tv); if (error) { if (error == EINPROGRESS) error = ETIMEDOUT; printf("krpc_call: error waiting for TCP socket connect: %d\n", error); goto out; } } /* * Prepend RPC message header. */ m = *data; *data = NULL; #if DIAGNOSTIC if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: send data w/o pkthdr"); if (mbuf_pkthdr_len(m) < mbuf_len(m)) panic("krpc_call: pkthdr.len not set"); #endif len = sizeof(*call); if (sotype == SOCK_STREAM) len += 4; /* account for RPC record marker */ mhead = m; if ((error = mbuf_prepend(&mhead, len, MBUF_WAITOK))) goto out; if ((error = mbuf_pkthdr_setrcvif(mhead, NULL))) goto out; /* * Fill in the RPC header */ if (sotype == SOCK_STREAM) { /* first, fill in RPC record marker */ u_int32_t *recmark = mbuf_data(mhead); *recmark = htonl(0x80000000 | (mbuf_pkthdr_len(mhead) - 4)); call = (struct rpc_call *)(recmark + 1); } else { call = mbuf_data(mhead); } bzero((caddr_t)call, sizeof(*call)); xid++; call->rp_xid = htonl(xid); /* call->rp_direction = 0; */ call->rp_rpcvers = htonl(2); call->rp_prog = htonl(prog); call->rp_vers = htonl(vers); call->rp_proc = htonl(func); /* call->rp_auth = 0; */ /* call->rp_verf = 0; */ /* * Send it, repeatedly, until a reply is received, * but delay each re-send by an increasing amount. * If the delay hits the maximum, start complaining. */ timo = 0; for (;;) { struct msghdr msg; /* Send RPC request (or re-send). */ if ((error = mbuf_copym(mhead, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) goto out; bzero(&msg, sizeof(msg)); if (sotype == SOCK_STREAM) { msg.msg_name = NULL; msg.msg_namelen = 0; } else { msg.msg_name = mbuf_data(nam); msg.msg_namelen = mbuf_len(nam); } error = sock_sendmbuf(so, &msg, m, 0, 0); if (error) { printf("krpc_call: sosend: %d\n", error); goto out; } m = NULL; /* Determine new timeout. */ if (timo < MAX_RESEND_DELAY) timo++; else printf("RPC timeout for server " IP_FORMAT "\n", IP_LIST(&(sin->sin_addr.s_addr))); /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ secs = timo; while (secs > 0) { size_t readlen; if (m) { mbuf_freem(m); m = NULL; } if (sotype == SOCK_STREAM) { int maxretries = 60; struct iovec aio; aio.iov_base = &len; aio.iov_len = sizeof(u_int32_t); bzero(&msg, sizeof(msg)); msg.msg_iov = &aio; msg.msg_iovlen = 1; do { error = sock_receive(so, &msg, MSG_WAITALL, &readlen); if ((error == EWOULDBLOCK) && (--maxretries <= 0)) error = ETIMEDOUT; } while (error == EWOULDBLOCK); if (!error && readlen < aio.iov_len) { /* only log a message if we got a partial word */ if (readlen != 0) printf("short receive (%ld/%ld) from server " IP_FORMAT "\n", readlen, sizeof(u_int32_t), IP_LIST(&(sin->sin_addr.s_addr))); error = EPIPE; } if (error) goto out; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > maxpacket) { printf("impossible packet length (%ld) from server " IP_FORMAT "\n", len, IP_LIST(&(sin->sin_addr.s_addr))); error = EFBIG; goto out; } do { readlen = len; error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &readlen); } while (error == EWOULDBLOCK); if (!error && (len > readlen)) { printf("short receive (%ld/%ld) from server " IP_FORMAT "\n", readlen, len, IP_LIST(&(sin->sin_addr.s_addr))); error = EPIPE; } } else { len = maxpacket; readlen = len; bzero(&msg, sizeof(msg)); msg.msg_name = from_p; msg.msg_namelen = (from_p == NULL) ? 0 : sizeof(*from_p); error = sock_receivembuf(so, &msg, &m, 0, &readlen); } if (error == EWOULDBLOCK) { secs--; continue; } if (error) goto out; len = readlen; /* Does the reply contain at least a header? */ if (len < MIN_REPLY_HDR) continue; if (mbuf_len(m) < MIN_REPLY_HDR) continue; reply = mbuf_data(m); /* Is it the right reply? */ if (reply->rp_direction != htonl(RPC_REPLY)) continue; if (reply->rp_xid != htonl(xid)) continue; /* Was RPC accepted? (authorization OK) */ if (reply->rp_astatus != 0) { error = ntohl(reply->rp_u.rpu_errno); printf("rpc denied, error=%d\n", error); /* convert rpc error to errno */ switch (error) { case RPC_MISMATCH: error = ERPCMISMATCH; break; case RPC_AUTHERR: error = EAUTH; break; } goto out; } if (mbuf_len(m) < REPLY_SIZE) { error = RPC_SYSTEM_ERR; } else { error = ntohl(reply->rp_u.rpu_ok.rp_rstatus); } /* Did the call succeed? */ if (error != 0) { printf("rpc status=%d\n", error); /* convert rpc error to errno */ switch (error) { case RPC_PROGUNAVAIL: error = EPROGUNAVAIL; break; case RPC_PROGMISMATCH: error = EPROGMISMATCH; break; case RPC_PROCUNAVAIL: error = EPROCUNAVAIL; break; case RPC_GARBAGE: error = EINVAL; break; case RPC_SYSTEM_ERR: error = EIO; break; } goto out; } goto gotreply; /* break two levels */ } /* while secs */ } /* forever send/receive */ error = ETIMEDOUT; goto out; gotreply: /* * Pull as much as we can into first mbuf, to make * result buffer contiguous. Note that if the entire * result won't fit into one mbuf, you're out of luck. * XXX - Should not rely on making the entire reply * contiguous (fix callers instead). -gwr */ #if DIAGNOSTIC if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: received pkt w/o header?"); #endif len = mbuf_pkthdr_len(m); if (sotype == SOCK_STREAM) len -= 4; /* the RPC record marker was read separately */ if (mbuf_len(m) < len) { if ((error = mbuf_pullup(&m, len))) goto out; reply = mbuf_data(m); } /* * Strip RPC header */ len = sizeof(*reply); if (reply->rp_u.rpu_ok.rp_auth.rp_atype != 0) { len += ntohl(reply->rp_u.rpu_ok.rp_auth.rp_alen); len = (len + 3) & ~3; /* XXX? */ } mbuf_adj(m, len); /* result */ *data = m; out: sock_close(so); out1: if (nam) mbuf_freem(nam); if (mhead) mbuf_freem(mhead); return error; }
rstatus_t dnode_peer_handshake_announcing(void *rmsg) { rstatus_t status; struct ring_msg *msg = rmsg; struct server_pool *sp = msg->sp; log_debug(LOG_VVERB, "dyn: handshaking peers"); struct array *peers = &sp->peers; uint32_t i,nelem; nelem = array_n(peers); //we assume one mbuf is enough for now - will enhance with multiple mbufs later struct mbuf *mbuf = mbuf_get(); if (mbuf == NULL) { log_debug(LOG_VVERB, "Too bad, not enough memory!"); return DN_ENOMEM; } //annoucing myself by sending msg: 'dc$rack$token,started_ts,node_state,node_dns' mbuf_write_string(mbuf, &sp->dc); mbuf_write_char(mbuf, '$'); mbuf_write_string(mbuf, &sp->rack); mbuf_write_char(mbuf, '$'); struct dyn_token *token = (struct dyn_token *) array_get(&sp->tokens, 0); if (token == NULL) { log_debug(LOG_VVERB, "Why? This should not be null!"); mbuf_put(mbuf); return DN_ERROR; } mbuf_write_uint32(mbuf, token->mag[0]); mbuf_write_char(mbuf, ','); int64_t cur_ts = (int64_t)time(NULL); mbuf_write_uint64(mbuf, cur_ts); mbuf_write_char(mbuf, ','); mbuf_write_uint8(mbuf, sp->ctx->dyn_state); mbuf_write_char(mbuf, ','); char *broadcast_addr = get_broadcast_address(sp); mbuf_write_bytes(mbuf, broadcast_addr, dn_strlen(broadcast_addr)); //for each peer, send a registered msg for (i = 0; i < nelem; i++) { struct server *peer = (struct server *) array_get(peers, i); if (peer->is_local) continue; log_debug(LOG_VVERB, "Gossiping to node '%.*s'", peer->name.len, peer->name.data); struct conn * conn = dnode_peer_conn(peer); if (conn == NULL) { //running out of connection due to memory exhaust log_debug(LOG_DEBUG, "Unable to obtain a connection object"); return DN_ERROR; } status = dnode_peer_connect(sp->ctx, peer, conn); if (status != DN_OK ) { dnode_peer_close(sp->ctx, conn); log_debug(LOG_DEBUG, "Error happened in connecting on conn %d", conn->sd); return DN_ERROR; } //conn-> dnode_peer_gossip_forward(sp->ctx, conn, sp->redis, mbuf); //peer_gossip_forward1(sp->ctx, conn, sp->redis, &data); } //free this as nobody else will do //mbuf_put(mbuf); return DN_OK; }
void dyn_parse_rsp(struct msg *r) { #ifdef DN_DEBUG_LOG log_debug(LOG_VERB, "In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); #endif if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } //check whether we need to decrypt the payload if (dmsg->bit_field == 1) { //dmsg->owner->owner->dnode_secured = 1; struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { log_debug(LOG_INFO, "Unable to obtain an mbuf for dnode msg's header!"); return; } #ifdef DN_DEBUG_LOG log_debug(LOG_VERB, "encrypted aes key length : %d", dmsg->mlen); loga("AES encryption key from conn: %s\n", base64_encode(r->owner->aes_key, AES_KEYLEN)); #endif //Dont need to decrypt AES key - pull it out from the conn dyn_aes_decrypt(dmsg->payload, dmsg->plen, decrypted_buf, r->owner->aes_key); #ifdef DN_DEBUG_LOG log_hexdump(LOG_VERB, decrypted_buf->pos, mbuf_length(decrypted_buf), "dyn message decrypted payload: "); #endif struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); b->last = b->pos; r->pos = decrypted_buf->start; mbuf_insert(&r->mhdr, decrypted_buf); } if (r->redis) return redis_parse_rsp(r); return memcache_parse_rsp(r); } #ifdef DN_DEBUG_LOG //bad case log_debug(LOG_DEBUG, "Bad message - cannot parse"); //fix me to do something msg_dump(r); #endif //r->state = 0; //r->result = MSG_PARSE_OK; }
void dyn_parse_rsp(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_rsp, start to process response :::::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_RES) { log_debug(LOG_DEBUG, "Resp parser: I got a dnode msg of type %d", dmsg->type); r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); return data_store_parse_rsp(r); } //Subtract already received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_rsp(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (done_parsing) return; return data_store_parse_rsp(r); } //bad case if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Resp: bad message - cannot parse"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
void dyn_parse_req(struct msg *r) { if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, ":::::::::::::::::::::: In dyn_parse_req, start to process request :::::::::::::::::::::: "); msg_dump(r); } bool done_parsing = false; struct mbuf *b = STAILQ_LAST(&r->mhdr, mbuf, next); if (dyn_parse_core(r)) { struct dmsg *dmsg = r->dmsg; struct conn *conn = r->owner; conn->same_dc = dmsg->same_dc; if (dmsg->type != DMSG_UNKNOWN && dmsg->type != DMSG_REQ && dmsg->type != DMSG_REQ_FORWARD && dmsg->type != GOSSIP_SYN) { r->state = 0; r->result = MSG_PARSE_OK; r->dyn_state = DYN_DONE; return; } if (r->dyn_state == DYN_DONE && dmsg->bit_field == 1) { dmsg->owner->owner->dnode_secured = 1; r->owner->dnode_crypto_state = 1; r->dyn_state = DYN_POST_DONE; r->result = MSG_PARSE_REPAIR; if (dmsg->mlen > 1) { //Decrypt AES key dyn_rsa_decrypt(dmsg->data, aes_decrypted_buf); strncpy(r->owner->aes_key, aes_decrypted_buf, strlen(aes_decrypted_buf)); } if (dmsg->plen + b->pos <= b->last) { struct mbuf *decrypted_buf = mbuf_get(); if (decrypted_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); r->result = MSG_OOM_ERROR; return; } dyn_aes_decrypt(b->pos, dmsg->plen, decrypted_buf, r->owner->aes_key); b->pos = b->pos + dmsg->plen; r->pos = decrypted_buf->start; mbuf_copy(decrypted_buf, b->pos, mbuf_length(b)); mbuf_insert(&r->mhdr, decrypted_buf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->mlen = mbuf_length(decrypted_buf); data_store_parse_req(r); } //substract alraedy received bytes dmsg->plen -= b->last - b->pos; return; } else if (r->dyn_state == DYN_POST_DONE) { struct mbuf *last_buf = STAILQ_LAST(&r->mhdr, mbuf, next); if (last_buf->read_flip == 1) { data_store_parse_req(r); } else { r->result = MSG_PARSE_AGAIN; } return; } if (dmsg->type == GOSSIP_SYN) { //TODOs: need to address multi-buffer msg later dmsg->payload = b->pos; b->pos = b->pos + dmsg->plen; r->pos = b->pos; done_parsing = true; } if (done_parsing) return; return data_store_parse_req(r); } //bad case if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "Bad or splitted message"); //fix me to do something msg_dump(r); } r->result = MSG_PARSE_AGAIN; }
static bool dyn_parse_core(struct msg *r) { struct dmsg *dmsg; struct mbuf *b; uint8_t *p, *token; uint8_t ch = ' '; uint64_t num = 0; dyn_state = r->dyn_state; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "dyn_state: %d", r->dyn_state); } if (r->dyn_state == DYN_DONE || r->dyn_state == DYN_POST_DONE) return true; b = STAILQ_LAST(&r->mhdr, mbuf, next); dmsg = r->dmsg; if (dmsg == NULL) { r->dmsg = dmsg_get(); dmsg = r->dmsg; dmsg->owner = r; if (dmsg == NULL) {//should track this as a dropped message loga("unable to create a new dmsg"); goto error; //should count as OOM error } } token = NULL; for (p = r->pos; p < b->last; p++) { ch = *p; switch (dyn_state) { case DYN_START: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_START"); } if (ch != ' ' && ch != '$') { break; } if (ch == ' ') { if (token == NULL) token = p; break; } if (ch == '$') { if (p + 5 < b->last) { if ((*(p+1) == '2') && (*(p+2) == '0') && (*(p+3) == '1') && (*(p+4) == '4') && (*(p+5) == '$')) { dyn_state = DYN_MAGIC_STRING; p += 5; } else { //goto skip; token = NULL; //reset } } else { goto split; } } else { loga("Facing a weird char %c", p); //goto skip; token = NULL; //reset } break; case DYN_MAGIC_STRING: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MAGIC_STRING"); } if (ch == ' ') { dyn_state = DYN_MSG_ID; num = 0; break; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; loga("Facing a weird char %c", p); //goto skip; dyn_state = DYN_START; } break; case DYN_MSG_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_MSG_ID"); log_debug(LOG_DEBUG, "num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "MSG ID : %d", num); } dmsg->id = num; dyn_state = DYN_TYPE_ID; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); //goto skip; token = NULL; //reset dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_TYPE_ID: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_TYPE_ID: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Type Id: %d", num); } dmsg->type = num; dyn_state = DYN_BIT_FIELD; num = 0; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_BIT_FIELD: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD, num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_BIT_FIELD : %d", num); } dmsg->bit_field = num & 0xF; dyn_state = DYN_VERSION; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_VERSION: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_VERSION: num = %d", num); } if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "VERSION : %d", num); } dmsg->version = num; dyn_state = DYN_SAME_DC; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_SAME_DC: if (isdigit(ch)) { dmsg->same_dc = ch - '0'; if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SAME_DC %d", dmsg->same_dc); } } else if (ch == ' ' && isdigit(*(p-1))) { dyn_state = DYN_DATA_LEN; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA_LEN: num = %d", num); } if (ch == '*') { break; } else if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == ' ' && isdigit(*(p-1))) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Data len: %d", num); } dmsg->mlen = num; dyn_state = DYN_DATA; num = 0; } else { token = NULL; //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DATA: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DATA"); } if (p + dmsg->mlen < b->last) { dmsg->data = p; p += dmsg->mlen - 1; dyn_state = DYN_SPACES_BEFORE_PAYLOAD_LEN; } else { //loga("char is '%c %c %c %c'", *(p-2), *(p-1), ch, *(p+1)); goto split; } break; case DYN_SPACES_BEFORE_PAYLOAD_LEN: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_SPACES_BEFORE_PAYLOAD_LEN"); } if (ch == ' ') { break; } else if (ch == '*') { dyn_state = DYN_PAYLOAD_LEN; num = 0; } break; case DYN_PAYLOAD_LEN: if (isdigit(ch)) { num = num*10 + (ch - '0'); } else if (ch == CR) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Payload len: %d", num); } dmsg->plen = num; num = 0; dyn_state = DYN_CRLF_BEFORE_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_CRLF_BEFORE_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_CRLF_BEFORE_DONE"); } if (*p == LF) { dyn_state = DYN_DONE; } else { token = NULL; dyn_state = DYN_START; if (ch == '$') p -= 1; } break; case DYN_DONE: if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "DYN_DONE"); } r->pos = p; dmsg->payload = p; r->dyn_state = DYN_DONE; b->pos = p; goto done; break; default: NOT_REACHED(); break; } } if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Not fully parsed yet!!!!!!"); } split: //this is an attempt recovery when we got a bad message //we try to look for the start the next good one and throw away the bad part if (r->dyn_state == DYN_START) { r->result = MSG_PARSE_AGAIN; if (b->last == b->end) { struct mbuf *nbuf = mbuf_get(); if (nbuf == NULL) { loga("Unable to obtain a new mbuf for replacement!"); mbuf_put(b); nbuf = mbuf_get(); mbuf_insert_head(&r->mhdr, nbuf); r->pos = nbuf->pos; return false; } //replacing the bad mbuf with a new and empty mbuf mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); mbuf_put(b); r->pos = nbuf->pos; return false; } else { //split it and throw away the bad portion struct mbuf *nbuf; nbuf = mbuf_split(&r->mhdr, r->pos, NULL, NULL); if (nbuf == NULL) { return DN_ENOMEM; } mbuf_insert(&r->mhdr, nbuf); mbuf_remove(&r->mhdr, b); r->pos = nbuf->pos; return false; } } if (mbuf_length(b) == 0 || b->last == b->end) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Would this case ever happen?"); } r->result = MSG_PARSE_AGAIN; return false; } if (r->pos == b->last) { if (log_loggable(LOG_DEBUG)) { log_debug(LOG_DEBUG, "Forward to reading the new block of data"); } r->dyn_state = DYN_START; r->result = MSG_PARSE_AGAIN; token = NULL; return false; } if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "in split"); } r->dyn_state = DYN_START; r->pos = token; r->result = MSG_PARSE_REPAIR; if (log_loggable(LOG_VVERB)) { log_hexdump(LOG_VVERB, b->pos, mbuf_length(b), "split and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "split and inspecting full req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return false; done: r->pos = p; dmsg->source_address = r->owner->addr; if (log_loggable(LOG_VVERB)) { log_debug(LOG_VVERB, "at done with p at %d", p); log_hexdump(LOG_VVERB, r->pos, b->last - r->pos, "done and inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); log_hexdump(LOG_VVERB, b->start, b->last - b->start, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, r->dyn_state); } return true; error: log_debug(LOG_ERR, "at error for state %d and c %c", dyn_state, *p); r->result = MSG_PARSE_ERROR; r->pos = p; errno = EINVAL; if (log_loggable(LOG_ERR)) { log_hexdump(LOG_ERR, b->pos, mbuf_length(b), "parsed bad req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); log_hexdump(LOG_ERR, p, b->last - p, "inspecting req %"PRIu64" " "res %d type %d state %d", r->id, r->result, r->type, dyn_state); } r->dyn_state = dyn_state; return false; }
/* * copy one response from src to dst * return bytes copied * */ static rstatus_t memcache_copy_bulk(struct msg *dst, struct msg *src) { struct mbuf *mbuf, *nbuf; uint8_t *p; uint32_t len = 0; uint32_t bytes = 0; uint32_t i = 0; for (mbuf = STAILQ_FIRST(&src->mhdr); mbuf && mbuf_empty(mbuf); mbuf = STAILQ_FIRST(&src->mhdr)) { mbuf_remove(&src->mhdr, mbuf); mbuf_put(mbuf); } mbuf = STAILQ_FIRST(&src->mhdr); if (mbuf == NULL) { return NC_OK; /* key not exists */ } p = mbuf->pos; /* get : VALUE key 0 len\r\nval\r\n */ /* gets: VALUE key 0 len cas\r\nval\r\n */ ASSERT(*p == 'V'); for (i = 0; i < 3; i++) { /* eat 'VALUE key 0 ' */ for (; *p != ' ';) { p++; } p++; } len = 0; for (; p < mbuf->last && isdigit(*p); p++) { len = len * 10 + (uint32_t)(*p - '0'); } for (; p < mbuf->last && ('\r' != *p); p++) { /* eat cas for gets */ ; } len += CRLF_LEN * 2; len += (p - mbuf->pos); bytes = len; /* copy len bytes to dst */ for (; mbuf;) { if (mbuf_length(mbuf) <= len) { /* steal this mbuf from src to dst */ nbuf = STAILQ_NEXT(mbuf, next); mbuf_remove(&src->mhdr, mbuf); mbuf_insert(&dst->mhdr, mbuf); len -= mbuf_length(mbuf); mbuf = nbuf; } else { /* split it */ nbuf = mbuf_get(); if (nbuf == NULL) { return NC_ENOMEM; } mbuf_copy(nbuf, mbuf->pos, len); mbuf_insert(&dst->mhdr, nbuf); mbuf->pos += len; break; } } dst->mlen += bytes; src->mlen -= bytes; log_debug(LOG_VVERB, "memcache_copy_bulk copy bytes: %d", bytes); return NC_OK; }
static rstatus_t sentinel_proc_pub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string pool_name, server_name, server_ip, tmp_string, pub_titile, pub_event; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&pub_titile, "pmessage"); string_set_text(&pub_event, "+switch-master"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for pub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_titile, &tmp_string)) { log_error("pub title error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 7 for pub event */ msg_read_line(msg, line_buf, 4); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&pub_event, &tmp_string)) { log_error("pub channel error(lineinfo %.*s)", tmp_string.len, tmp_string.data); goto error; } /* get line in line num 9 for pub info */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel pmessage when skip line not used."); goto error; } /* parse switch master info */ /* get pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get pool name string failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ' ', &server_name); if (status != NC_OK) { log_error("get server name string failed."); goto error; } /* skip old ip and port string */ status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old ip string failed."); goto error; } status = mbuf_read_string(line_buf, ' ', NULL); if (status != NC_OK) { log_error("skip old port string failed."); goto error; } /* get new server ip string */ status = mbuf_read_string(line_buf, ' ', &server_ip); if (status != NC_OK) { log_error("get new server ip string failed."); goto error; } /* get new server port */ status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { log_error("get new server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); if (status == NC_OK) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&server_ip); string_deinit(&server_name); string_deinit(&pool_name); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_acksub(struct context *ctx, struct msg *msg) { rstatus_t status; struct string sub_titile, sub_channel, sub_ok, tmp_string; struct mbuf *line_buf; string_init(&tmp_string); string_set_text(&sub_titile, "psubscribe"); string_set_text(&sub_channel, "+switch-master"); string_set_text(&sub_ok, ":1"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get line in line num 3 for sub titile */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_titile, &tmp_string)) { goto error; } /* get line in line num 5 for sub channel */ msg_read_line(msg, line_buf, 2); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_channel, &tmp_string)) { goto error; } /* get sub status */ msg_read_line(msg, line_buf, 1); if (line_buf == 0) { log_error("read line failed from sentinel ack sub when skip line not used."); goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK || string_compare(&sub_ok, &tmp_string)) { goto error; } log_debug(LOG_INFO, "success sub channel %.*s from sentinel", sub_channel.len, sub_channel.data); status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); return status; error: status = NC_ERROR; goto done; }
static rstatus_t sentinel_proc_sentinel_info(struct context *ctx, struct msg *msg) { rstatus_t status; int i, master_num, switch_num; struct string pool_name, server_name, server_ip, tmp_string, sentinel_masters_prefix, master_ok; struct mbuf *line_buf; int server_port; string_init(&tmp_string); string_init(&pool_name); string_init(&server_name); string_init(&server_ip); string_set_text(&sentinel_masters_prefix, "sentinel_masters"); string_set_text(&master_ok, "status=ok"); line_buf = mbuf_get(); if (line_buf == NULL) { goto error; } /* get sentinel master num at line 3 */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } status = mbuf_read_string(line_buf, ':', &tmp_string); if (status != NC_OK || string_compare(&sentinel_masters_prefix, &tmp_string)) { goto error; } status = mbuf_read_string(line_buf, CR, &tmp_string); if (status != NC_OK) { goto error; } master_num = nc_atoi(tmp_string.data, tmp_string.len); if (master_num < 0) { log_error("parse master number from sentinel ack info failed."); goto error; } /* skip 3 line in ack info which is not used. */ msg_read_line(msg, line_buf, 3); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when skip line not used."); goto error; } /* parse master info from sentinel ack info */ switch_num = 0; for (i = 0; i < master_num; i++) { msg_read_line(msg, line_buf, 1); if (mbuf_length(line_buf) == 0) { log_error("read line failed from sentinel ack info when parse master item."); goto error; } log_debug(LOG_INFO, "master item line : %.*s", mbuf_length(line_buf), line_buf->pos); /* skip master item prefix */ status = mbuf_read_string(line_buf, ':', NULL); if (status != NC_OK) { log_error("skip master item prefix failed"); goto error; } /* skip master item server name prefix */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item server name prefix failed."); goto error; } /* get server pool name */ status = mbuf_read_string(line_buf, SENTINEL_SERVERNAME_SPLIT, &pool_name); if (status != NC_OK) { log_error("get server pool name failed."); goto error; } /* get server name */ status = mbuf_read_string(line_buf, ',', &server_name); if (status != NC_OK) { log_error("get server name failed."); goto error; } /* get master status */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get master status failed."); goto error; } if (string_compare(&master_ok, &tmp_string)) { log_error("master item status is not ok, use it anyway"); } /* skip ip string prefix name */ status = mbuf_read_string(line_buf, '=', NULL); if (status != NC_OK) { log_error("skip master item address prefix failed."); goto error; } /* get server ip string */ status = mbuf_read_string(line_buf, ':', &server_ip); if (status != NC_OK) { log_error("get server ip string failed."); goto error; } /* get server port */ status = mbuf_read_string(line_buf, ',', &tmp_string); if (status != NC_OK) { log_error("get server port string failed."); goto error; } server_port = nc_atoi(tmp_string.data, tmp_string.len); if (server_port < 0) { log_error("tanslate server port string to int failed."); goto error; } status = server_switch(ctx, &pool_name, &server_name, &server_ip, server_port); /* if server is switched, add switch number */ if (status == NC_OK) { switch_num++; } } if (switch_num > 0) { conf_rewrite(ctx); } status = NC_OK; done: if (line_buf != NULL) { mbuf_put(line_buf); } string_deinit(&tmp_string); string_deinit(&pool_name); string_deinit(&server_name); string_deinit(&server_ip); return status; error: status = NC_ERROR; goto done; }
/* Forward a client request over to a peer */ void dnode_peer_req_forward(struct context *ctx, struct conn *c_conn, struct conn *p_conn, struct msg *msg, struct rack *rack, uint8_t *key, uint32_t keylen) { if (TRACING_LEVEL == LOG_VVERB) { log_debug(LOG_VVERB, "dnode_peer_req_forward entering"); } rstatus_t status; /* enqueue message (request) into client outq, if response is expected */ if (!msg->noreply) { c_conn->enqueue_outq(ctx, c_conn, msg); } ASSERT(!p_conn->dnode_client && !p_conn->dnode_server); ASSERT(c_conn->client); /* enqueue the message (request) into peer inq */ if (TAILQ_EMPTY(&p_conn->imsg_q)) { status = event_add_out(ctx->evb, p_conn); if (status != DN_OK) { dnode_req_forward_error(ctx, p_conn, msg); p_conn->err = errno; return; } } uint64_t msg_id = peer_msg_id++; struct mbuf *header_buf = mbuf_get(); if (header_buf == NULL) { loga("Unable to obtain an mbuf for dnode msg's header!"); return; } if (p_conn->dnode_secured) { //Encrypting and adding header for a request struct mbuf *data_buf = STAILQ_LAST(&msg->mhdr, mbuf, next); //TODOs: need to deal with multi-block later log_debug(LOG_VERB, "AES encryption key: %s\n", base64_encode(p_conn->aes_key, AES_KEYLEN)); struct mbuf *encrypted_buf = mbuf_get(); if (encrypted_buf == NULL) { loga("Unable to obtain an mbuf for encryption!"); return; //TODOs: need to clean up } status = dyn_aes_encrypt(data_buf->pos, mbuf_length(data_buf), encrypted_buf, p_conn->aes_key); log_debug(LOG_VERB, "#encrypted bytes : %d", status); //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, mbuf_length(encrypted_buf)); mbuf_insert_head(&msg->mhdr, header_buf); log_hexdump(LOG_VERB, data_buf->pos, mbuf_length(data_buf), "dyn message original payload: "); log_hexdump(LOG_VERB, encrypted_buf->pos, mbuf_length(encrypted_buf), "dyn message encrypted payload: "); //remove the original dbuf out of the queue and insert encrypted mbuf to replace mbuf_remove(&msg->mhdr, data_buf); mbuf_insert(&msg->mhdr, encrypted_buf); //free it as no one will need it again mbuf_put(data_buf); } else { //write dnode header dmsg_write(header_buf, msg_id, DMSG_REQ, p_conn, 0); mbuf_insert_head(&msg->mhdr, header_buf); } if (TRACING_LEVEL == LOG_VVERB) { log_hexdump(LOG_VVERB, header_buf->pos, mbuf_length(header_buf), "dyn message header: "); msg_dump(msg); } p_conn->enqueue_inq(ctx, p_conn, msg); dnode_peer_req_forward_stats(ctx, p_conn->owner, msg); if (TRACING_LEVEL == LOG_VERB) { log_debug(LOG_VERB, "remote forward from c %d to s %d req %"PRIu64" len %"PRIu32 " type %d with key '%.*s'", c_conn->sd, p_conn->sd, msg->id, msg->mlen, msg->type, keylen, key); } }