bool b2b_forward_item_vbucket(conn *uc, downstream *d, item *it, conn *c, bool self, int vbucket) { (void)self; assert(d != NULL); assert(d->ptd != NULL); assert(uc != NULL); assert(uc->next == NULL); assert(uc->noreply == false); assert(c != NULL); // Assuming we're already connected to downstream. // // TODO: Optimize to self codepath. // if (settings.verbose > 2) { moxi_log_write("%d: b2b_forward_item_vbucket %x to %d, vbucket %d\n", uc->sfd, uc->cmd, c->sfd, vbucket); } protocol_binary_request_header *req = (protocol_binary_request_header *) ITEM_data(it); if (vbucket >= 0) { req->request.reserved = htons(vbucket); } if (add_conn_item(c, it) == true) { // The caller keeps its refcount, and we need our own. // it->refcount++; if (add_iov(c, ITEM_data(it), it->nbytes) == 0) { conn_set_state(c, conn_mwrite); c->write_and_go = conn_new_cmd; if (update_event(c, EV_WRITE | EV_PERSIST)) { if (settings.verbose > 2) { moxi_log_write("%d: b2b_forward %x to %d success\n", uc->sfd, uc->cmd, c->sfd); } return true; } } } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); return false; }
void cproxy_process_a2a_downstream(conn *c, char *line) { assert(c != NULL); assert(c->next == NULL); assert(c->extra != NULL); assert(c->cmd == -1); assert(c->item == NULL); assert(line != NULL); assert(line == c->rcurr); assert(IS_ASCII(c->protocol)); assert(IS_PROXY(c->protocol)); if (settings.verbose > 1) fprintf(stderr, "<%d cproxy_process_a2a_downstream %s\n", c->sfd, line); downstream *d = c->extra; assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); if (strncmp(line, "VALUE ", 6) == 0) { token_t tokens[MAX_TOKENS]; size_t ntokens; unsigned int flags; int clen = 0; int vlen; uint64_t cas = CPROXY_NOT_CAS; ntokens = scan_tokens(line, tokens, MAX_TOKENS, &clen); if (ntokens >= 5 && // Accounts for extra termimation token. ntokens <= 6 && tokens[KEY_TOKEN].length <= KEY_MAX_LENGTH && safe_strtoul(tokens[2].value, (uint32_t *) &flags) && safe_strtoul(tokens[3].value, (uint32_t *) &vlen)) { char *key = tokens[KEY_TOKEN].value; size_t nkey = tokens[KEY_TOKEN].length; item *it = item_alloc(key, nkey, flags, 0, vlen + 2); if (it != NULL) { if (ntokens == 5 || safe_strtoull(tokens[4].value, &cas)) { ITEM_set_cas(it, cas); c->item = it; c->ritem = ITEM_data(it); c->rlbytes = it->nbytes; c->cmd = -1; conn_set_state(c, conn_nread); return; // Success. } else { if (settings.verbose > 1) fprintf(stderr, "cproxy could not parse cas\n"); } } else { if (settings.verbose > 1) fprintf(stderr, "cproxy could not item_alloc size %u\n", vlen + 2); } if (it != NULL) item_remove(it); it = NULL; c->sbytes = vlen + 2; // Number of bytes to swallow. conn_set_state(c, conn_swallow); // Note, eventually, we'll see an END later. } else { // We don't know how much to swallow, so close the downstream. // The conn_closing should release the downstream, // which should write a suffix/error to the upstream. // conn_set_state(c, conn_closing); } } else if (strncmp(line, "END", 3) == 0) { conn_set_state(c, conn_pause); } else if (strncmp(line, "OK", 2) == 0) { conn_set_state(c, conn_pause); // TODO: Handle flush_all's expiration parameter against // the front_cache. // // TODO: We flush the front_cache too often, inefficiently // on every downstream flush_all OK response, rather than // on just the last flush_all OK response. // conn *uc = d->upstream_conn; if (uc != NULL && uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH) { mcache_flush_all(&d->ptd->proxy->front_cache, 0); } } else if (strncmp(line, "STAT ", 5) == 0 || strncmp(line, "ITEM ", 5) == 0 || strncmp(line, "PREFIX ", 7) == 0) { assert(d->merger != NULL); conn *uc = d->upstream_conn; if (uc != NULL) { assert(uc->next == NULL); if (protocol_stats_merge_line(d->merger, line) == false) { // Forward the line as-is if we couldn't merge it. // int nline = strlen(line); item *it = item_alloc("s", 1, 0, 0, nline + 2); if (it != NULL) { strncpy(ITEM_data(it), line, nline); strncpy(ITEM_data(it) + nline, "\r\n", 2); if (add_conn_item(uc, it)) { add_iov(uc, ITEM_data(it), nline + 2); it = NULL; } if (it != NULL) item_remove(it); } } } conn_set_state(c, conn_new_cmd); } else { conn_set_state(c, conn_pause); // The upstream conn might be NULL when closed already // or while handling a noreply. // conn *uc = d->upstream_conn; if (uc != NULL) { assert(uc->next == NULL); out_string(uc, line); if (!update_event(uc, EV_WRITE | EV_PERSIST)) { if (settings.verbose > 1) fprintf(stderr, "Can't update upstream write event\n"); d->ptd->stats.stats.err_oom++; cproxy_close_conn(uc); } cproxy_del_front_cache_key_ascii_response(d, line, uc->cmd_start); } } }
/* We reach here after nread'ing a header+body into an item. */ void cproxy_process_b2b_downstream_nread(conn *c) { conn *uc; item *it; downstream *d; protocol_binary_response_header *header; int extlen; int keylen; uint32_t bodylen; int status; int opcode; cb_assert(c != NULL); cb_assert(c->cmd >= 0); cb_assert(c->next == NULL); cb_assert(c->cmd_start == NULL); cb_assert(IS_BINARY(c->protocol)); cb_assert(IS_PROXY(c->protocol)); header = (protocol_binary_response_header *) &c->binary_header; extlen = header->response.extlen; keylen = header->response.keylen; bodylen = header->response.bodylen; status = ntohs(header->response.status); opcode = header->response.opcode; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread %x %x %d %d %u %d %x\n", c->sfd, c->cmd, opcode, extlen, keylen, bodylen, c->noreply, status); } d = c->extra; cb_assert(d != NULL); cb_assert(d->ptd != NULL); cb_assert(d->ptd->proxy != NULL); /* TODO: Need to handle quiet binary command error response, */ /* in the right order. */ /* TODO: Need to handle not-my-vbucket error during a quiet cmd. */ uc = d->upstream_conn; it = c->item; /* Clear c->item because we either move it to the upstream or */ /* item_remove() it on error. */ c->item = NULL; cb_assert(it != NULL); cb_assert(it->refcount == 1); if (cproxy_binary_ignore_reply(c, header, it)) { return; } if (c->noreply) { conn_set_state(c, conn_new_cmd); } else { conn_set_state(c, conn_pause); if (opcode == PROTOCOL_BINARY_CMD_NOOP || opcode == PROTOCOL_BINARY_CMD_FLUSH) { goto done; } if (opcode == PROTOCOL_BINARY_CMD_STAT) { if (status == PROTOCOL_BINARY_RESPONSE_SUCCESS) { if (keylen > 0) { if (d->merger != NULL) { char *key = (ITEM_data(it)) + sizeof(*header) + extlen; char *val = key + keylen; protocol_stats_merge_name_val(d->merger, "STAT", 4, key, keylen, val, bodylen - keylen - extlen); } conn_set_state(c, conn_new_cmd); /* Get next STATS response. */ } } goto done; } /* If the client is still there, we should handle */ /* a not-my-vbucket error with a possible retry. */ if (uc != NULL && status == PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET) { int max_retries; protocol_binary_request_header *req; int vbucket; int sindex; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x %d\n", c->sfd, header->response.opcode, uc->item != NULL); } cb_assert(uc->item != NULL); req = (protocol_binary_request_header *)ITEM_data((item*)uc->item); vbucket = ntohs(req->request.reserved); sindex = downstream_conn_index(d, c); if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x not multi-key get, sindex %d, vbucket %d, retries %d\n", c->sfd, header->response.opcode, sindex, vbucket, uc->cmd_retries); } mcs_server_invalid_vbucket(&d->mst, sindex, vbucket); /* As long as the upstream is still open and we haven't */ /* retried too many times already. */ max_retries = cproxy_max_retries(d); if (uc->cmd_retries < max_retries) { uc->cmd_retries++; d->upstream_retry++; d->ptd->stats.stats.tot_retry_vbucket++; goto done; } if (settings.verbose > 2) { moxi_log_write("%d: cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x skipping retry %d >= %d\n", c->sfd, header->response.opcode, uc->cmd_retries, max_retries); } } } /* Write the response to the upstream connection. */ if (uc != NULL) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread got %u\n", c->sfd, it->nbytes); cproxy_dump_header(c->sfd, ITEM_data(it)); } if (add_conn_item(uc, it) == true) { it->refcount++; if (add_iov(uc, ITEM_data(it), it->nbytes) == 0) { /* If we got a quiet response, however, don't change the */ /* upstream connection's state (should be in paused state), */ /* as we expect the downstream server to provide a */ /* verbal/non-quiet response that moves the downstream */ /* conn through the conn_pause countdown codepath. */ if (c->noreply == false) { cproxy_update_event_write(d, uc); conn_set_state(uc, conn_mwrite); } goto done; } } d->ptd->stats.stats.err_oom++; cproxy_close_conn(uc); } done: if (it != NULL) { item_remove(it); } }
/* Used for broadcast commands, like no-op, flush_all or stats. */ bool cproxy_broadcast_b2b_downstream(downstream *d, conn *uc) { int nwrite = 0; int nconns; int i; cb_assert(d != NULL); cb_assert(d->ptd != NULL); cb_assert(d->ptd->proxy != NULL); cb_assert(d->downstream_conns != NULL); cb_assert(uc != NULL); cb_assert(uc->next == NULL); cb_assert(uc->noreply == false); nconns = mcs_server_count(&d->mst); for (i = 0; i < nconns; i++) { conn *c = d->downstream_conns[i]; if (c != NULL && c != NULL_CONN && b2b_forward_item_vbucket(uc, d, uc->item, c, -1) == true) { nwrite++; } } if (settings.verbose > 2) { moxi_log_write("%d: b2b broadcast nwrite %d out of %d\n", uc->sfd, nwrite, nconns); } if (nwrite > 0) { /* TODO: Handle binary 'stats reset' sub-command. */ item *it; if (uc->cmd == PROTOCOL_BINARY_CMD_STAT && d->merger == NULL) { d->merger = genhash_init(128, skeyhash_ops); } it = item_alloc("h", 1, 0, 0, sizeof(protocol_binary_response_header)); if (it != NULL) { protocol_binary_response_header *header = (protocol_binary_response_header *) ITEM_data(it); memset(ITEM_data(it), 0, it->nbytes); header->response.magic = (uint8_t) PROTOCOL_BINARY_RES; header->response.opcode = uc->binary_header.request.opcode; header->response.opaque = uc->opaque; if (add_conn_item(uc, it)) { d->upstream_suffix = ITEM_data(it); d->upstream_suffix_len = it->nbytes; d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS; d->target_host_ident = NULL; if (settings.verbose > 2) { moxi_log_write("%d: b2b broadcast upstream_suffix", uc->sfd); cproxy_dump_header(uc->sfd, ITEM_data(it)); } /* TODO: Handle FLUSHQ (quiet binary flush_all). */ d->downstream_used_start = nwrite; d->downstream_used = nwrite; cproxy_start_downstream_timeout(d, NULL); return true; } item_remove(it); } } return false; }
/** * @param cas_emit 1: emit CAS. * 0: do not emit CAS. * -1: data driven. */ void cproxy_upstream_ascii_item_response(item *it, conn *uc, int cas_emit) { assert(it != NULL); assert(uc != NULL); assert(uc->state == conn_pause); assert(uc->funcs != NULL); assert(IS_ASCII(uc->protocol)); assert(IS_PROXY(uc->protocol)); if (settings.verbose > 2) { char key[KEY_MAX_LENGTH + 10]; assert(it->nkey <= KEY_MAX_LENGTH); memcpy(key, ITEM_key(it), it->nkey); key[it->nkey] = '\0'; moxi_log_write("<%d cproxy ascii item response, key %s\n", uc->sfd, key); } if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) { // TODO: Need to clean up half-written add_iov()'s. // Consider closing the upstream_conns? // uint64_t cas = ITEM_get_cas(it); if ((cas_emit == 0) || (cas_emit < 0 && cas == CPROXY_NOT_CAS)) { if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix + it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response success\n", uc->sfd); } } } } else { char *suffix = add_conn_suffix(uc); if (suffix != NULL) { sprintf(suffix, " %llu\r\n", (unsigned long long) cas); if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix - 2) == 0 && add_iov(uc, suffix, strlen(suffix)) == 0 && add_iov(uc, ITEM_data(it), it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response ok\n", uc->sfd); } } } } } } else { if (settings.verbose > 1) { moxi_log_write("ERROR: unexpected downstream data block"); } } }
void protocol_stats_foreach_write(const void *key, const void *value, void *user_data) { char *line = (char *) value; conn *uc = (conn *) user_data; int nline; cb_assert(line != NULL); cb_assert(uc != NULL); (void)key; nline = strlen(line); if (nline > 0) { item *it; if (settings.verbose > 2) { moxi_log_write("%d: cproxy_stats writing: %s\n", uc->sfd, line); } if (IS_BINARY(uc->protocol)) { token_t line_tokens[MAX_TOKENS]; size_t line_ntokens = scan_tokens(line, line_tokens, MAX_TOKENS, NULL); if (line_ntokens == 4) { uint16_t key_len = line_tokens[NAME_TOKEN].length; uint32_t data_len = line_tokens[VALUE_TOKEN].length; it = item_alloc("s", 1, 0, 0, sizeof(protocol_binary_response_stats) + key_len + data_len); if (it != NULL) { protocol_binary_response_stats *header = (protocol_binary_response_stats *) ITEM_data(it); memset(ITEM_data(it), 0, it->nbytes); header->message.header.response.magic = (uint8_t) PROTOCOL_BINARY_RES; header->message.header.response.opcode = uc->binary_header.request.opcode; header->message.header.response.keylen = (uint16_t) htons(key_len); header->message.header.response.bodylen = htonl(key_len + data_len); header->message.header.response.opaque = uc->opaque; memcpy((ITEM_data(it)) + sizeof(protocol_binary_response_stats), line_tokens[NAME_TOKEN].value, key_len); memcpy((ITEM_data(it)) + sizeof(protocol_binary_response_stats) + key_len, line_tokens[VALUE_TOKEN].value, data_len); if (add_conn_item(uc, it)) { add_iov(uc, ITEM_data(it), it->nbytes); if (settings.verbose > 2) { moxi_log_write("%d: cproxy_stats writing binary", uc->sfd); cproxy_dump_header(uc->sfd, ITEM_data(it)); } return; } item_remove(it); } } return; } it = item_alloc("s", 1, 0, 0, nline + 2); if (it != NULL) { strncpy(ITEM_data(it), line, nline); strncpy(ITEM_data(it) + nline, "\r\n", 2); if (add_conn_item(uc, it)) { add_iov(uc, ITEM_data(it), nline + 2); return; } item_remove(it); } } }