void cproxy_binary_uncork_cmds(downstream *d, conn *uc) { assert(d != NULL); assert(uc != NULL); if (settings.verbose > 2) { moxi_log_write("%d: cproxy_binary_uncork_cmds\n", uc->sfd); } int n = 0; while (uc->corked != NULL) { bin_cmd *next = uc->corked->next; item *it = uc->corked->request_item; if (it != NULL) { b2b_forward_item(uc, d, it); n++; } if (uc->corked->request_item != NULL) { item_remove(uc->corked->request_item); } if (uc->corked->response_item != NULL) { item_remove(uc->corked->response_item); } free(uc->corked); uc->corked = next; } if (settings.verbose > 2) { moxi_log_write("%d: cproxy_binary_uncork_cmds, uncorked %d\n", uc->sfd, n); } }
/* * Processes an incoming "handle a new connection" item. This is called when * input arrives on the libevent wakeup pipe. */ static void thread_libevent_process(int fd, short which, void *arg) { LIBEVENT_THREAD *me = arg; CQ_ITEM *cq_item; char buf[1]; (void)which; if (read(fd, buf, 1) != 1) if (settings.verbose > 0) moxi_log_write("Can't read from libevent pipe\n"); cq_item = cq_pop(me->new_conn_queue); if (NULL != cq_item) { conn *c = conn_new(cq_item->sfd, cq_item->init_state, cq_item->event_flags, cq_item->read_buffer_size, cq_item->transport, me->base, cq_item->funcs, cq_item->extra); if (c == NULL) { if (IS_UDP(cq_item->transport)) { moxi_log_write("Can't listen for events on UDP socket\n"); exit(1); } else { if (settings.verbose > 0) { moxi_log_write("Can't listen for events on fd %d\n", cq_item->sfd); } close(cq_item->sfd); } } else { c->protocol = cq_item->protocol; c->thread = me; } cqi_free(cq_item); } }
int connect_server(vbs_config_t *config) { // make a connection to the vbs server int vbs_fd; struct sockaddr_in vbs_addr; struct hostent *he = NULL; if ((vbs_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { moxi_log_write("ERROR: unable to create socket\n"); return -1; } memset(&vbs_addr, 0, sizeof(vbs_addr)); vbs_addr.sin_family = AF_INET; vbs_addr.sin_port = htons(config->port); if (!(he = gethostbyname(config->hostname))) { moxi_log_write("ERROR: unable to resolve address %s\n", config->hostname); return -1; } struct in_addr **addr_list = (struct in_addr **)he->h_addr_list; vbs_addr.sin_addr = *addr_list[0]; //now connect to the vbs server if (connect(vbs_fd, (struct sockaddr *)&vbs_addr, sizeof(vbs_addr)) < 0) { moxi_log_write("ERROR: unable to connect to server %s\n", config->hostname); return -1; } return vbs_fd; }
void cproxy_dump_behavior_stderr(const void *dump_opaque, const char *prefix, const char *key, const char *val) { (void)dump_opaque; assert(key); assert(val); if (prefix == NULL) { prefix = ""; } moxi_log_write("%s %s: %s\n", prefix, key, val); }
int start_assoc_maintenance_thread() { int ret; char *env = getenv("MEMCACHED_HASH_BULK_MOVE"); if (env != NULL) { hash_bulk_move = atoi(env); if (hash_bulk_move == 0) { hash_bulk_move = DEFAULT_HASH_BULK_MOVE; } } if ((ret = cb_create_thread(&maintenance_tid, assoc_maintenance_thread, NULL, 0)) != 0) { moxi_log_write("Can't create thread: %s\n", strerror(ret)); return -1; } return 0; }
/* grows the hashtable to the next power of 2. */ static void assoc_expand(void) { old_hashtable = primary_hashtable; primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *)); if (primary_hashtable) { if (settings.verbose > 1) moxi_log_write("Hash table expansion starting\n"); hashpower++; expanding = true; expand_bucket = 0; cb_cond_signal(&maintenance_cond); } else { primary_hashtable = old_hashtable; /* Bad news, but we can keep running. */ } }
/* * Worker thread: main event loop */ static void *worker_libevent(void *arg) { LIBEVENT_THREAD *me = arg; /* Any per-thread setup can happen here; thread_init() will block until * all threads have finished initializing. */ me->thread_id = pthread_self(); #ifndef WIN32 if (settings.verbose > 1) moxi_log_write("worker_libevent thread_id %ld\n", (long)me->thread_id); #endif pthread_mutex_lock(&init_lock); init_count++; pthread_cond_signal(&init_cond); pthread_mutex_unlock(&init_lock); event_base_loop(me->base, 0); return NULL; }
static void assoc_maintenance_thread(void *arg) { (void)arg; while (do_run_maintenance_thread) { int ii = 0; /* Lock the cache, and bulk move multiple buckets to the new * hash table. */ cb_mutex_enter(&cache_lock); for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { item *it, *next; int bucket; for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { next = it->h_next; bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower); it->h_next = primary_hashtable[bucket]; primary_hashtable[bucket] = it; } old_hashtable[expand_bucket] = NULL; expand_bucket++; if (expand_bucket == hashsize(hashpower - 1)) { expanding = false; free(old_hashtable); if (settings.verbose > 1) moxi_log_write("Hash table expansion done\n"); } } if (!expanding) { /* We are done expanding.. just wait for next invocation */ cb_cond_wait(&maintenance_cond, &cache_lock); } cb_mutex_exit(&cache_lock); } }
//read data from socket. Called when data available in the socket //read an entire chunk before returning int read_socket(int fd, char **buf, int heartbeat) { int data_len = 0, rlen = 0, len; char *wbuf; if (heartbeat) { struct timeval tv={heartbeat, 0}; setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv)); } //read four bytes from the socket read(fd, &data_len, sizeof(data_len)); if (data_len == 0) { //timeout on the read socket or close return data_len; } data_len = ntohl(data_len); wbuf = *buf = malloc(data_len); // Read full while (data_len != rlen) { len = read(fd, (void *)wbuf, data_len - rlen); if (len > 0) { wbuf += len; rlen += len; } else if (len == EWOULDBLOCK) { // timed out return 0; } else if (len != EINTR && len != EAGAIN) { break; } } if (len <= 0) { // socket is not non blocking. return with an error moxi_log_write("ERROR: Unable to read from socket %d (%s)\n", fd, strerror(len)); free(*buf); return -1; } return data_len; }
void cproxy_del_front_cache_key_ascii(downstream *d, char *command) { assert(d); assert(d->ptd); assert(d->ptd->proxy); if (mcache_started(&d->ptd->proxy->front_cache)) { char *spc = strchr(command, ' '); if (spc != NULL) { char *key = spc + 1; int key_len = skey_len(key); if (key_len > 0) { mcache_delete(&d->ptd->proxy->front_cache, key, key_len); if (settings.verbose > 2) { moxi_log_write("front_cache del %s\n", key); } } } } }
/** * Depending on our configuration, we can optimize SET's * on certain keys by making them fire-and-forget and * immediately transmitting a success response to the * upstream client. */ bool cproxy_optimize_set_ascii(downstream *d, conn *uc, char *key, int key_len) { assert(d); assert(d->ptd); assert(d->ptd->proxy); assert(uc); assert(uc->next == NULL); if (d->ptd->behavior_pool.base.optimize_set[0] == '\0') { return false; } if (matcher_check(&d->ptd->proxy->optimize_set_matcher, key, key_len, false)) { d->upstream_conn = NULL; d->upstream_suffix = NULL; d->upstream_suffix_len = 0; d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS; d->upstream_retry = 0; d->target_host_ident = NULL; out_string(uc, "STORED"); if (!update_event(uc, EV_WRITE | EV_PERSIST)) { if (settings.verbose > 1) { moxi_log_write("ERROR: Can't update upstream write event\n"); } d->ptd->stats.stats.err_oom++; cproxy_close_conn(uc); } return true; } return false; }
void cproxy_process_a2a_downstream(conn *c, char *line) { assert(c != NULL); assert(c->next == NULL); assert(c->extra != NULL); assert(c->cmd == -1); assert(c->item == NULL); assert(line != NULL); assert(line == c->rcurr); assert(IS_ASCII(c->protocol)); assert(IS_PROXY(c->protocol)); if (settings.verbose > 1) { moxi_log_write("<%d cproxy_process_a2a_downstream %s\n", c->sfd, line); } downstream *d = c->extra; assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); if (strncmp(line, "VALUE ", 6) == 0) { token_t tokens[MAX_TOKENS]; size_t ntokens; unsigned int flags; int clen = 0; int vlen; uint64_t cas = CPROXY_NOT_CAS; ntokens = scan_tokens(line, tokens, MAX_TOKENS, &clen); if (ntokens >= 5 && // Accounts for extra termimation token. ntokens <= 6 && tokens[KEY_TOKEN].length <= KEY_MAX_LENGTH && safe_strtoul(tokens[2].value, (uint32_t *) &flags) && safe_strtoul(tokens[3].value, (uint32_t *) &vlen)) { char *key = tokens[KEY_TOKEN].value; size_t nkey = tokens[KEY_TOKEN].length; item *it = item_alloc(key, nkey, flags, 0, vlen + 2); if (it != NULL) { if (ntokens == 5 || safe_strtoull(tokens[4].value, &cas)) { ITEM_set_cas(it, cas); c->item = it; c->ritem = ITEM_data(it); c->rlbytes = it->nbytes; c->cmd = -1; conn_set_state(c, conn_nread); return; // Success. } else { if (settings.verbose > 1) { moxi_log_write("cproxy could not parse cas\n"); } } } else { if (settings.verbose > 1) { moxi_log_write("cproxy could not item_alloc size %u\n", vlen + 2); } } if (it != NULL) { item_remove(it); } it = NULL; c->sbytes = vlen + 2; // Number of bytes to swallow. conn_set_state(c, conn_swallow); // Note, eventually, we'll see an END later. } else { // We don't know how much to swallow, so close the downstream. // The conn_closing should release the downstream, // which should write a suffix/error to the upstream. // conn_set_state(c, conn_closing); } } else if (strncmp(line, "END", 3) == 0) { conn_set_state(c, conn_pause); } else if (strncmp(line, "OK", 2) == 0) { conn_set_state(c, conn_pause); // TODO: Handle flush_all's expiration parameter against // the front_cache. // // TODO: We flush the front_cache too often, inefficiently // on every downstream flush_all OK response, rather than // on just the last flush_all OK response. // conn *uc = d->upstream_conn; if (uc != NULL && uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH) { mcache_flush_all(&d->ptd->proxy->front_cache, 0); } } else if (strncmp(line, "STAT ", 5) == 0 || strncmp(line, "ITEM ", 5) == 0 || strncmp(line, "PREFIX ", 7) == 0) { assert(d->merger != NULL); conn *uc = d->upstream_conn; if (uc != NULL) { assert(uc->next == NULL); if (protocol_stats_merge_line(d->merger, line) == false) { // Forward the line as-is if we couldn't merge it. // int nline = strlen(line); item *it = item_alloc("s", 1, 0, 0, nline + 2); if (it != NULL) { strncpy(ITEM_data(it), line, nline); strncpy(ITEM_data(it) + nline, "\r\n", 2); if (add_conn_item(uc, it)) { add_iov(uc, ITEM_data(it), nline + 2); it = NULL; } if (it != NULL) { item_remove(it); } } } } conn_set_state(c, conn_new_cmd); } else if (strncmp(line, "LOCK_ERROR", 10) == 0) { d->upstream_suffix = "LOCK_ERROR\r\n"; d->upstream_suffix_len = 0; d->upstream_status = PROTOCOL_BINARY_RESPONSE_ETMPFAIL; d->upstream_retry = 0; d->target_host_ident = NULL; conn_set_state(c, conn_pause); } else if (strncmp(line, "NOT_FOUND", 9) == 0) { d->upstream_suffix = "NOT_FOUND\r\n"; d->upstream_suffix_len = 0; d->upstream_retry = 0; d->target_host_ident = NULL; conn_set_state(c, conn_pause); } else { conn_set_state(c, conn_pause); // The upstream conn might be NULL when closed already // or while handling a noreply. // conn *uc = d->upstream_conn; if (uc != NULL) { assert(uc->next == NULL); out_string(uc, line); if (!update_event(uc, EV_WRITE | EV_PERSIST)) { if (settings.verbose > 1) { moxi_log_write("Can't update upstream write event\n"); } d->ptd->stats.stats.err_oom++; cproxy_close_conn(uc); } cproxy_del_front_cache_key_ascii_response(d, line, uc->cmd_start); } } }
/* We reach here after nread'ing a header+body into an item. */ void cproxy_process_b2b_downstream_nread(conn *c) { conn *uc; item *it; downstream *d; protocol_binary_response_header *header; int extlen; int keylen; uint32_t bodylen; int status; int opcode; cb_assert(c != NULL); cb_assert(c->cmd >= 0); cb_assert(c->next == NULL); cb_assert(c->cmd_start == NULL); cb_assert(IS_BINARY(c->protocol)); cb_assert(IS_PROXY(c->protocol)); header = (protocol_binary_response_header *) &c->binary_header; extlen = header->response.extlen; keylen = header->response.keylen; bodylen = header->response.bodylen; status = ntohs(header->response.status); opcode = header->response.opcode; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread %x %x %d %d %u %d %x\n", c->sfd, c->cmd, opcode, extlen, keylen, bodylen, c->noreply, status); } d = c->extra; cb_assert(d != NULL); cb_assert(d->ptd != NULL); cb_assert(d->ptd->proxy != NULL); /* TODO: Need to handle quiet binary command error response, */ /* in the right order. */ /* TODO: Need to handle not-my-vbucket error during a quiet cmd. */ uc = d->upstream_conn; it = c->item; /* Clear c->item because we either move it to the upstream or */ /* item_remove() it on error. */ c->item = NULL; cb_assert(it != NULL); cb_assert(it->refcount == 1); if (cproxy_binary_ignore_reply(c, header, it)) { return; } if (c->noreply) { conn_set_state(c, conn_new_cmd); } else { conn_set_state(c, conn_pause); if (opcode == PROTOCOL_BINARY_CMD_NOOP || opcode == PROTOCOL_BINARY_CMD_FLUSH) { goto done; } if (opcode == PROTOCOL_BINARY_CMD_STAT) { if (status == PROTOCOL_BINARY_RESPONSE_SUCCESS) { if (keylen > 0) { if (d->merger != NULL) { char *key = (ITEM_data(it)) + sizeof(*header) + extlen; char *val = key + keylen; protocol_stats_merge_name_val(d->merger, "STAT", 4, key, keylen, val, bodylen - keylen - extlen); } conn_set_state(c, conn_new_cmd); /* Get next STATS response. */ } } goto done; } /* If the client is still there, we should handle */ /* a not-my-vbucket error with a possible retry. */ if (uc != NULL && status == PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET) { int max_retries; protocol_binary_request_header *req; int vbucket; int sindex; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x %d\n", c->sfd, header->response.opcode, uc->item != NULL); } cb_assert(uc->item != NULL); req = (protocol_binary_request_header *)ITEM_data((item*)uc->item); vbucket = ntohs(req->request.reserved); sindex = downstream_conn_index(d, c); if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x not multi-key get, sindex %d, vbucket %d, retries %d\n", c->sfd, header->response.opcode, sindex, vbucket, uc->cmd_retries); } mcs_server_invalid_vbucket(&d->mst, sindex, vbucket); /* As long as the upstream is still open and we haven't */ /* retried too many times already. */ max_retries = cproxy_max_retries(d); if (uc->cmd_retries < max_retries) { uc->cmd_retries++; d->upstream_retry++; d->ptd->stats.stats.tot_retry_vbucket++; goto done; } if (settings.verbose > 2) { moxi_log_write("%d: cproxy_process_b2b_downstream_nread not-my-vbucket, " "cmd: %x skipping retry %d >= %d\n", c->sfd, header->response.opcode, uc->cmd_retries, max_retries); } } } /* Write the response to the upstream connection. */ if (uc != NULL) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream_nread got %u\n", c->sfd, it->nbytes); cproxy_dump_header(c->sfd, ITEM_data(it)); } if (add_conn_item(uc, it) == true) { it->refcount++; if (add_iov(uc, ITEM_data(it), it->nbytes) == 0) { /* If we got a quiet response, however, don't change the */ /* upstream connection's state (should be in paused state), */ /* as we expect the downstream server to provide a */ /* verbal/non-quiet response that moves the downstream */ /* conn through the conn_pause countdown codepath. */ if (c->noreply == false) { cproxy_update_event_write(d, uc); conn_set_state(uc, conn_mwrite); } goto done; } } d->ptd->stats.stats.err_oom++; cproxy_close_conn(uc); } done: if (it != NULL) { item_remove(it); } }
/* Called when we receive a binary response header from * a downstream server, via try_read_command()/drive_machine(). */ void cproxy_process_b2b_downstream(conn *c) { char *ikey; int ikeylen; downstream *d; int extlen; int keylen; uint32_t bodylen; cb_assert(c != NULL); cb_assert(c->cmd >= 0); cb_assert(c->next == NULL); cb_assert(c->item == NULL); cb_assert(IS_BINARY(c->protocol)); cb_assert(IS_PROXY(c->protocol)); cb_assert(c->substate == bin_no_state); d = c->extra; cb_assert(d); c->cmd_curr = -1; c->cmd_start = NULL; c->cmd_start_time = msec_current_time; c->cmd_retries = 0; extlen = c->binary_header.request.extlen; keylen = c->binary_header.request.keylen; bodylen = c->binary_header.request.bodylen; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_b2b_downstream %x %d %d %u\n", c->sfd, c->cmd, extlen, keylen, bodylen); } cb_assert(bodylen >= (uint32_t) keylen + extlen); process_bin_noreply(c); /* Map quiet c->cmd values into non-quiet. */ /* Our approach is to read everything we can before */ /* getting into big switch/case statements for the */ /* actual processing. */ /* Alloc an item and continue with an rest-of-body nread if */ /* necessary. The item will hold the entire response message */ /* (the header + body). */ ikey = "q"; ikeylen = 1; c->item = item_alloc(ikey, ikeylen, 0, 0, sizeof(c->binary_header) + bodylen); if (c->item != NULL) { item *it = c->item; void *rb = c->rcurr; cb_assert(it->refcount == 1); memcpy(ITEM_data(it), rb, sizeof(c->binary_header)); if (bodylen > 0) { c->ritem = ITEM_data(it) + sizeof(c->binary_header); c->rlbytes = bodylen; c->substate = bin_read_set_value; conn_set_state(c, conn_nread); } else { /* Since we have no body bytes, we can go immediately to */ /* the nread completed processing step. */ cproxy_process_b2b_downstream_nread(c); } } else { d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } }
/* Do the actual work of forwarding the command from an * upstream binary conn to its assigned binary downstream. */ bool cproxy_forward_b2b_downstream(downstream *d) { int nc; int server_index; conn *uc; cb_assert(d != NULL); cb_assert(d->ptd != NULL); cb_assert(d->ptd->proxy != NULL); cb_assert(d->downstream_conns != NULL); cb_assert(d->downstream_used == 0); cb_assert(d->multiget == NULL); cb_assert(d->merger == NULL); d->downstream_used_start = 0; uc = d->upstream_conn; if (settings.verbose > 2) { moxi_log_write("%d: cproxy_forward_b2b_downstream %x\n", uc->sfd, uc->cmd); } cb_assert(uc != NULL); cb_assert(uc->state == conn_pause); cb_assert(uc->cmd >= 0); cb_assert(uc->cmd_start == NULL); cb_assert(uc->thread != NULL); cb_assert(uc->thread->base != NULL); cb_assert(uc->noreply == false); cb_assert(IS_BINARY(uc->protocol)); cb_assert(IS_PROXY(uc->protocol)); server_index = -1; if (cproxy_is_broadcast_cmd(uc->cmd) == false && uc->corked == NULL) { item *it = uc->item; protocol_binary_request_header *req; char *key; int key_len; cb_assert(it != NULL); req = (protocol_binary_request_header *) ITEM_data(it); key = ((char *) req) + sizeof(*req) + req->request.extlen; key_len = ntohs(req->request.keylen); if (key_len > 0) { server_index = cproxy_server_index(d, key, key_len, NULL); if (server_index < 0) { return false; } } } nc = cproxy_connect_downstream(d, uc->thread, server_index); if (nc == -1) { return true; } if (nc > 0) { int i; int nconns; cb_assert(d->downstream_conns != NULL); if (d->usec_start == 0 && d->ptd->behavior_pool.base.time_stats) { d->usec_start = usec_now(); } nconns = mcs_server_count(&d->mst); for (i = 0; i < nconns; i++) { conn *c = d->downstream_conns[i]; if (c != NULL && c != NULL_CONN) { cb_assert(c->state == conn_pause); cb_assert(c->item == NULL); if (cproxy_prep_conn_for_write(c) == false) { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(c); return false; } } } /* Uncork the saved-up quiet binary commands. */ cproxy_binary_uncork_cmds(d, uc); if (uc->cmd == PROTOCOL_BINARY_CMD_FLUSH || uc->cmd == PROTOCOL_BINARY_CMD_NOOP || uc->cmd == PROTOCOL_BINARY_CMD_STAT) { return cproxy_broadcast_b2b_downstream(d, uc); } return cproxy_forward_b2b_simple_downstream(d, uc); } if (settings.verbose > 2) { moxi_log_write("%d: cproxy_forward_b2b_downstream connect failed\n", uc->sfd); } return false; }
/* Used for broadcast commands, like no-op, flush_all or stats. */ bool cproxy_broadcast_b2b_downstream(downstream *d, conn *uc) { int nwrite = 0; int nconns; int i; cb_assert(d != NULL); cb_assert(d->ptd != NULL); cb_assert(d->ptd->proxy != NULL); cb_assert(d->downstream_conns != NULL); cb_assert(uc != NULL); cb_assert(uc->next == NULL); cb_assert(uc->noreply == false); nconns = mcs_server_count(&d->mst); for (i = 0; i < nconns; i++) { conn *c = d->downstream_conns[i]; if (c != NULL && c != NULL_CONN && b2b_forward_item_vbucket(uc, d, uc->item, c, -1) == true) { nwrite++; } } if (settings.verbose > 2) { moxi_log_write("%d: b2b broadcast nwrite %d out of %d\n", uc->sfd, nwrite, nconns); } if (nwrite > 0) { /* TODO: Handle binary 'stats reset' sub-command. */ item *it; if (uc->cmd == PROTOCOL_BINARY_CMD_STAT && d->merger == NULL) { d->merger = genhash_init(128, skeyhash_ops); } it = item_alloc("h", 1, 0, 0, sizeof(protocol_binary_response_header)); if (it != NULL) { protocol_binary_response_header *header = (protocol_binary_response_header *) ITEM_data(it); memset(ITEM_data(it), 0, it->nbytes); header->response.magic = (uint8_t) PROTOCOL_BINARY_RES; header->response.opcode = uc->binary_header.request.opcode; header->response.opaque = uc->opaque; if (add_conn_item(uc, it)) { d->upstream_suffix = ITEM_data(it); d->upstream_suffix_len = it->nbytes; d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS; d->target_host_ident = NULL; if (settings.verbose > 2) { moxi_log_write("%d: b2b broadcast upstream_suffix", uc->sfd); cproxy_dump_header(uc->sfd, ITEM_data(it)); } /* TODO: Handle FLUSHQ (quiet binary flush_all). */ d->downstream_used_start = nwrite; d->downstream_used = nwrite; cproxy_start_downstream_timeout(d, NULL); return true; } item_remove(it); } } return false; }
/** * @param cas_emit 1: emit CAS. * 0: do not emit CAS. * -1: data driven. */ void cproxy_upstream_ascii_item_response(item *it, conn *uc, int cas_emit) { assert(it != NULL); assert(uc != NULL); assert(uc->state == conn_pause); assert(uc->funcs != NULL); assert(IS_ASCII(uc->protocol)); assert(IS_PROXY(uc->protocol)); if (settings.verbose > 2) { char key[KEY_MAX_LENGTH + 10]; assert(it->nkey <= KEY_MAX_LENGTH); memcpy(key, ITEM_key(it), it->nkey); key[it->nkey] = '\0'; moxi_log_write("<%d cproxy ascii item response, key %s\n", uc->sfd, key); } if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) { // TODO: Need to clean up half-written add_iov()'s. // Consider closing the upstream_conns? // uint64_t cas = ITEM_get_cas(it); if ((cas_emit == 0) || (cas_emit < 0 && cas == CPROXY_NOT_CAS)) { if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix + it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response success\n", uc->sfd); } } } } else { char *suffix = add_conn_suffix(uc); if (suffix != NULL) { sprintf(suffix, " %llu\r\n", (unsigned long long) cas); if (add_conn_item(uc, it)) { it->refcount++; if (add_iov(uc, "VALUE ", 6) == 0 && add_iov(uc, ITEM_key(it), it->nkey) == 0 && add_iov(uc, ITEM_suffix(it), it->nsuffix - 2) == 0 && add_iov(uc, suffix, strlen(suffix)) == 0 && add_iov(uc, ITEM_data(it), it->nbytes) == 0) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy ascii item response ok\n", uc->sfd); } } } } } } else { if (settings.verbose > 1) { moxi_log_write("ERROR: unexpected downstream data block"); } } }
void cproxy_process_upstream_ascii(conn *c, char *line) { assert(c != NULL); assert(c->next == NULL); assert(c->extra != NULL); assert(c->cmd == -1); assert(c->item == NULL); assert(line != NULL); assert(line == c->rcurr); assert(IS_ASCII(c->protocol)); assert(IS_PROXY(c->protocol)); if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_upstream_ascii %s\n", c->sfd, line); } // Snapshot rcurr, because the caller, try_read_command(), changes it. // c->cmd_curr = -1; c->cmd_start = c->rcurr; c->cmd_start_time = msec_current_time; c->cmd_retries = 0; proxy_td *ptd = c->extra; assert(ptd != NULL); /* For commands set/add/replace, we build an item and read the data * directly into it, then continue in nread_complete(). */ if (!cproxy_prep_conn_for_write(c)) { ptd->stats.stats.err_upstream_write_prep++; conn_set_state(c, conn_closing); return; } bool mcmux_command = false; bool self_command = false; /* Check for proxy pattern - A:host:port or B:host:port */ if (true == settings.enable_mcmux_mode && ((*line == 'A' || *line == 'B') && *(line + 1) == ':')) { mcmux_command = true; } else if (true == settings.enable_mcmux_mode) { self_command = true; } c->peer_protocol = 0; c->peer_host = NULL; c->peer_port = 0; if (mcmux_command) { char *peer_port = NULL; int i = 0; c->peer_protocol = (*line == 'A') ? proxy_downstream_ascii_prot : proxy_downstream_binary_prot; line += 2; c->peer_host = line; while (*line != ' ' && *line != '\0' && *line != ':' && ++i < MAX_HOSTNAME_LEN) { line++; } if (*line == '\0' || line - c->peer_host <= 0) { out_string(c, "ERROR"); moxi_log_write("Malformed request line"); return; } *line = '\0'; line++; peer_port = line; i = 0; while (*line != ' ' && *line != '\0' && ++i <= MAX_PORT_LEN) { line++; } if (*line == '\0' || line - peer_port <= 0) { out_string(c, "ERROR"); moxi_log_write("Malformed request line"); return; } c->peer_port = atoi(peer_port); *line++ = '\0'; c->cmd_start = line; } int cmd_len = 0; token_t tokens[MAX_TOKENS]; size_t ntokens = scan_tokens(line, tokens, MAX_TOKENS, &cmd_len); char *cmd = tokens[COMMAND_TOKEN].value; int cmdx = -1; int cmd_st = STATS_CMD_TYPE_REGULAR; int comm; #define SEEN(cmd_id, is_cas, cmd_len) \ cmd_st = c->noreply ? \ STATS_CMD_TYPE_QUIET : STATS_CMD_TYPE_REGULAR; \ ptd->stats.stats_cmd[cmd_st][cmd_id].seen++; \ ptd->stats.stats_cmd[cmd_st][cmd_id].read_bytes += cmd_len; \ if (is_cas) { \ ptd->stats.stats_cmd[cmd_st][cmd_id].cas++; \ } if (ntokens >= 3 && (false == self_command) && (strncmp(cmd, "get", 3) == 0)) { if (cmd[3] == 'l') { c->cmd_curr = PROTOCOL_BINARY_CMD_GETL; } else if (ntokens == 3) { // Single-key get/gets optimization. // c->cmd_curr = PROTOCOL_BINARY_CMD_GETK; } else { c->cmd_curr = PROTOCOL_BINARY_CMD_GETKQ; } // Handles get and gets. // cproxy_pause_upstream_for_downstream(ptd, c); // The cmd_len from scan_tokens might not include // all the keys, so cmd_len might not == strlen(command). // Handle read_bytes during multiget broadcast. // if (cmd[3] == 'l') { SEEN(STATS_CMD_GETL, true, 0); } else { SEEN(STATS_CMD_GET, cmd[3] == 's', 0); } } else if ((ntokens == 6 || ntokens == 7) && (false == self_command) && ((strncmp(cmd, "add", 3) == 0 && (comm = NREAD_ADD) && (cmdx = STATS_CMD_ADD) && (c->cmd_curr = PROTOCOL_BINARY_CMD_ADD)) || (strncmp(cmd, "set", 3) == 0 && (comm = NREAD_SET) && (cmdx = STATS_CMD_SET) && (c->cmd_curr = PROTOCOL_BINARY_CMD_SET)) || (strncmp(cmd, "replace", 7) == 0 && (comm = NREAD_REPLACE) && (cmdx = STATS_CMD_REPLACE) && (c->cmd_curr = PROTOCOL_BINARY_CMD_REPLACE)) || (strncmp(cmd, "prepend", 7) == 0 && (comm = NREAD_PREPEND) && (cmdx = STATS_CMD_PREPEND) && (c->cmd_curr = PROTOCOL_BINARY_CMD_PREPEND)) || (strncmp(cmd, "append", 6) == 0 && (comm = NREAD_APPEND) && (cmdx = STATS_CMD_APPEND) && (c->cmd_curr = PROTOCOL_BINARY_CMD_APPEND)))) { assert(c->item == NULL); c->item = NULL; process_update_command(c, tokens, ntokens, comm, false); if (cmdx >= 0) { item *it = c->item; if (it != NULL) { SEEN(cmdx, false, cmd_len + it->nbytes); } else { SEEN(cmdx, false, cmd_len); ptd->stats.stats_cmd[cmd_st][cmdx].misses++; } } } else if ((ntokens == 7 || ntokens == 8) && (false == self_command) && (strncmp(cmd, "cas", 3) == 0 && (comm = NREAD_CAS) && (c->cmd_curr = PROTOCOL_BINARY_CMD_SET))) { assert(c->item == NULL); c->item = NULL; process_update_command(c, tokens, ntokens, comm, true); item *it = c->item; if (it != NULL) { SEEN(STATS_CMD_CAS, true, cmd_len + it->nbytes); } else { SEEN(STATS_CMD_CAS, true, cmd_len); ptd->stats.stats_cmd[cmd_st][STATS_CMD_CAS].misses++; } } else if ((ntokens == 4 || ntokens == 5) && (false == self_command) && (strncmp(cmd, "incr", 4) == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_INCREMENT)) { set_noreply_maybe(c, tokens, ntokens); cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_INCR, false, cmd_len); } else if ((ntokens == 4 || ntokens == 5) && (false == self_command) && (strncmp(cmd, "decr", 4) == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_DECREMENT)) { set_noreply_maybe(c, tokens, ntokens); cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_DECR, false, cmd_len); } else if (ntokens >= 3 && ntokens <= 4 && (false == self_command) && (strncmp(cmd, "delete", 6) == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_DELETE)) { set_noreply_maybe(c, tokens, ntokens); cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_DELETE, false, cmd_len); } else if (ntokens >= 2 && ntokens <= 4 && (false == self_command) && (strncmp(cmd, "flush_all", 9) == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_FLUSH)) { set_noreply_maybe(c, tokens, ntokens); cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_FLUSH_ALL, false, cmd_len); } else if (ntokens >= 3 && ntokens <= 4 && (strncmp(cmd, "stats proxy", 10) == 0)) { process_stats_proxy_command(c, tokens, ntokens); SEEN(STATS_CMD_STATS, false, cmd_len); } else if (ntokens == 3 && (false == self_command) && (strcmp(cmd, "stats reset") == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_STAT)) { cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_STATS_RESET, false, cmd_len); } else if (ntokens == 2 && (false == self_command) && (strcmp(cmd, "stats") == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_STAT)) { // Even though we've coded to handle advanced stats // like stats cachedump, prevent those here to avoid // locking downstream servers. // cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_STATS, false, cmd_len); } else if (ntokens == 2 && (true == mcmux_command) && (strncmp(cmd, "version", 7) == 0) && (c->cmd_curr = PROTOCOL_BINARY_CMD_VERSION)) { /* downstream version command */ cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_VERSION, false, cmd_len); } else if (ntokens == 2 && (strncmp(cmd, "version", 7) == 0)) { out_string(c, "VERSION " VERSION); SEEN(STATS_CMD_VERSION, false, cmd_len); } else if ((ntokens == 3 || ntokens == 4) && (strncmp(cmd, "verbosity", 9) == 0)) { process_verbosity_command(c, tokens, ntokens); SEEN(STATS_CMD_VERBOSITY, false, cmd_len); } else if (ntokens == 2 && (strncmp(cmd, "quit", 4) == 0)) { conn_set_state(c, conn_closing); SEEN(STATS_CMD_QUIT, false, cmd_len); } else if (ntokens == 4 && (strncmp(cmd, "unl", 3) == 0) && (false == self_command) && (c->cmd_curr = PROTOCOL_BINARY_CMD_UNL)) { cproxy_pause_upstream_for_downstream(ptd, c); SEEN(STATS_CMD_UNL, false, cmd_len); } else { out_string(c, "ERROR"); SEEN(STATS_CMD_ERROR, false, cmd_len); } }
void cproxy_process_upstream_binary(conn *c) { cb_assert(c != NULL); cb_assert(c->cmd >= 0); cb_assert(c->next == NULL); cb_assert(c->item == NULL); cb_assert(IS_BINARY(c->protocol)); cb_assert(IS_PROXY(c->protocol)); proxy_td *ptd = c->extra; cb_assert(ptd != NULL); if (!cproxy_prep_conn_for_write(c)) { ptd->stats.stats.err_upstream_write_prep++; conn_set_state(c, conn_closing); return; } c->cmd_curr = -1; c->cmd_start = NULL; c->cmd_start_time = msec_current_time; c->cmd_retries = 0; int extlen = c->binary_header.request.extlen; int keylen = c->binary_header.request.keylen; uint32_t bodylen = c->binary_header.request.bodylen; cb_assert(bodylen >= (uint32_t) keylen + extlen); if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_upstream_binary %x %d %d %u\n", c->sfd, c->cmd, extlen, keylen, bodylen); } process_bin_noreply(c); /* Map quiet c->cmd values into non-quiet. */ if (c->cmd == PROTOCOL_BINARY_CMD_VERSION || c->cmd == PROTOCOL_BINARY_CMD_QUIT) { dispatch_bin_command(c); return; } /* Alloc an item and continue with an rest-of-body nread if */ /* necessary. The item will hold the entire request message */ /* (the header + body). */ char *ikey = "u"; int ikeylen = 1; c->item = item_alloc(ikey, ikeylen, 0, 0, sizeof(c->binary_header) + bodylen); if (c->item != NULL) { item *it = c->item; void *rb = c->rcurr; cb_assert(it->refcount == 1); memcpy(ITEM_data(it), rb, sizeof(c->binary_header)); if (bodylen > 0) { c->ritem = ITEM_data(it) + sizeof(c->binary_header); c->rlbytes = bodylen; c->substate = bin_read_set_value; conn_set_state(c, conn_nread); } else { /* Since we have no body bytes, we can go immediately to */ /* the nread completed processing step. */ if (c->binary_header.request.opcode == PROTOCOL_BINARY_CMD_SASL_LIST_MECHS) { /* TODO: One day handle more than just PLAIN sasl auth. */ write_bin_response(c, "PLAIN", 0, 0, strlen("PLAIN")); return; } cproxy_pause_upstream_for_downstream(ptd, c); } } else { if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_upstream_binary OOM\n", c->sfd); } ptd->stats.stats.err_oom++; cproxy_close_conn(c); } }
/* Forward an upstream command that came with item data, * like set/add/replace/etc. */ bool cproxy_forward_a2a_item_downstream(downstream *d, short cmd, item *it, conn *uc) { assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); assert(d->downstream_conns != NULL); assert(it != NULL); assert(uc != NULL); assert(uc->next == NULL); // Assuming we're already connected to downstream. // conn *c = cproxy_find_downstream_conn(d, ITEM_key(it), it->nkey, NULL); if (c != NULL) { if (cproxy_prep_conn_for_write(c)) { assert(c->state == conn_pause); char *verb = nread_text(cmd); assert(verb != NULL); char *str_flags = ITEM_suffix(it); char *str_length = strchr(str_flags + 1, ' '); int len_flags = str_length - str_flags; int len_length = it->nsuffix - len_flags - 2; char *str_exptime = add_conn_suffix(c); char *str_cas = (cmd == NREAD_CAS ? add_conn_suffix(c) : NULL); if (str_flags != NULL && str_length != NULL && len_flags > 1 && len_length > 1 && str_exptime != NULL && (cmd != NREAD_CAS || str_cas != NULL)) { sprintf(str_exptime, " %u", it->exptime); if (str_cas != NULL) { sprintf(str_cas, " %llu", (unsigned long long) ITEM_get_cas(it)); } if (add_iov(c, verb, strlen(verb)) == 0 && add_iov(c, ITEM_key(it), it->nkey) == 0 && add_iov(c, str_flags, len_flags) == 0 && add_iov(c, str_exptime, strlen(str_exptime)) == 0 && add_iov(c, str_length, len_length) == 0 && (str_cas == NULL || add_iov(c, str_cas, strlen(str_cas)) == 0) && (uc->noreply == false || add_iov(c, " noreply", 8) == 0) && add_iov(c, ITEM_data(it) - 2, it->nbytes + 2) == 0) { conn_set_state(c, conn_mwrite); c->write_and_go = conn_new_cmd; if (update_event(c, EV_WRITE | EV_PERSIST)) { d->downstream_used_start = 1; d->downstream_used = 1; if (cproxy_dettach_if_noreply(d, uc) == false) { cproxy_start_downstream_timeout(d, c); // During a synchronous (with-reply) SET, // handle fire-&-forget SET optimization. // if (cmd == NREAD_SET && cproxy_optimize_set_ascii(d, uc, ITEM_key(it), it->nkey)) { d->ptd->stats.stats.tot_optimize_sets++; } } else { c->write_and_go = conn_pause; cproxy_front_cache_delete(d->ptd, ITEM_key(it), it->nkey); } return true; } } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } else { // TODO: Handle this weird error case. } } else { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(c); } if (settings.verbose > 1) { moxi_log_write("Proxy item write out of memory"); } } return false; }
/* Forward a simple one-liner command downstream. * For example, get, incr/decr, delete, etc. * The response, though, might be a simple line or * multiple VALUE+END lines. */ bool cproxy_forward_a2a_simple_downstream(downstream *d, char *command, conn *uc) { assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); assert(d->downstream_conns != NULL); assert(command != NULL); assert(uc != NULL); assert(uc->item == NULL); assert(uc->cmd_curr != (protocol_binary_command) -1); assert(d->multiget == NULL); assert(d->merger == NULL); // Handles get and gets. // if (uc->cmd_curr == PROTOCOL_BINARY_CMD_GETK || uc->cmd_curr == PROTOCOL_BINARY_CMD_GETKQ || uc->cmd_curr == PROTOCOL_BINARY_CMD_GETL) { // Only use front_cache for 'get', not for 'gets'. // mcache *front_cache = (command[3] == ' ') ? &d->ptd->proxy->front_cache : NULL; return multiget_ascii_downstream(d, uc, a2a_multiget_start, a2a_multiget_skey, a2a_multiget_end, front_cache); } assert(uc->next == NULL); if (uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH) { return cproxy_broadcast_a2a_downstream(d, command, uc, "OK\r\n"); } if (uc->cmd_curr == PROTOCOL_BINARY_CMD_STAT) { if (strncmp(command + 5, " reset", 6) == 0) { return cproxy_broadcast_a2a_downstream(d, command, uc, "RESET\r\n"); } if (cproxy_broadcast_a2a_downstream(d, command, uc, "END\r\n")) { d->merger = genhash_init(512, skeyhash_ops); return true; } else { return false; } } // TODO: Inefficient repeated scan_tokens. // int cmd_len = 0; token_t tokens[MAX_TOKENS]; size_t ntokens = scan_tokens(command, tokens, MAX_TOKENS, &cmd_len); char *key = tokens[KEY_TOKEN].value; int key_len = tokens[KEY_TOKEN].length; if (ntokens <= 1) { // This was checked long ago, while parsing assert(false); // the upstream conn. return false; } // Assuming we're already connected to downstream. // if (!strcmp(command, "version")) { /* fake key for version command handling */ key = "v"; key_len = 1; } conn *c = cproxy_find_downstream_conn(d, key, key_len, NULL); if (c != NULL) { if (cproxy_prep_conn_for_write(c)) { assert(c->state == conn_pause); out_string(c, command); if (settings.verbose > 1) { moxi_log_write("forwarding to %d, noreply %d\n", c->sfd, uc->noreply); } if (update_event(c, EV_WRITE | EV_PERSIST)) { d->downstream_used_start = 1; d->downstream_used = 1; if (cproxy_dettach_if_noreply(d, uc) == false) { cproxy_start_downstream_timeout(d, c); } else { c->write_and_go = conn_pause; // Do mcache_delete() here only during a noreply, // otherwise for with-reply requests, we could // be in a race with other clients repopulating // the front_cache. For with-reply requests, we // clear the front_cache when we get a success reply. // cproxy_front_cache_delete(d->ptd, key, key_len); } return true; } if (settings.verbose > 1) { moxi_log_write("Couldn't update cproxy write event\n"); } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } else { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(c); } } return false; }
/** Note: the key and val param buffers are modified. */ void cproxy_parse_behavior_key_val(char *key, char *val, proxy_behavior *behavior) { uint32_t ms = 0; uint32_t x = 0; bool ok = false; assert(behavior != NULL); if (key != NULL && val != NULL) { key = trimstr(key); val = trimstr(val); if (wordeq(key, "cycle")) { ok = safe_strtoul(val, &behavior->cycle); } else if (wordeq(key, "downstream_max") || wordeq(key, "concurrency")) { ok = safe_strtoul(val, &behavior->downstream_max); } else if (wordeq(key, "downstream_conn_max")) { ok = safe_strtoul(val, &behavior->downstream_conn_max); } else if (wordeq(key, "weight") || wordeq(key, "downstream_weight")) { ok = safe_strtoul(val, &behavior->downstream_weight); } else if (wordeq(key, "retry") || wordeq(key, "downstream_retry")) { ok = safe_strtoul(val, &behavior->downstream_retry); } else if (wordeq(key, "protocol") || wordeq(key, "downstream_protocol")) { if (wordeq(val, "ascii") || wordeq(val, "memcached-ascii") || wordeq(val, "membase-ascii")) { behavior->downstream_protocol = proxy_downstream_ascii_prot; ok = true; } else if (wordeq(val, "binary") || wordeq(val, "memcached-binary") || wordeq(val, "membase-binary")) { behavior->downstream_protocol = proxy_downstream_binary_prot; ok = true; } else { if (settings.verbose > 1) { moxi_log_write("unknown behavior prot: %s\n", val); } } } else if (wordeq(key, "timeout") || wordeq(key, "downstream_timeout") || wordeq(key, "downstream_conn_timeout")) { ok = safe_strtoul(val, &ms); behavior->downstream_timeout.tv_sec = floor(ms / 1000.0); behavior->downstream_timeout.tv_usec = (ms % 1000) * 1000; } else if (wordeq(key, "downstream_conn_queue_timeout")) { ok = safe_strtoul(val, &ms); behavior->downstream_conn_queue_timeout.tv_sec = floor(ms / 1000.0); behavior->downstream_conn_queue_timeout.tv_usec = (ms % 1000) * 1000; } else if (wordeq(key, "wait_queue_timeout")) { ok = safe_strtoul(val, &ms); behavior->wait_queue_timeout.tv_sec = floor(ms / 1000.0); behavior->wait_queue_timeout.tv_usec = (ms % 1000) * 1000; } else if (wordeq(key, "connect_timeout")) { ok = safe_strtoul(val, &ms); behavior->connect_timeout.tv_sec = floor(ms / 1000.0); behavior->connect_timeout.tv_usec = (ms % 1000) * 1000; } else if (wordeq(key, "auth_timeout")) { ok = safe_strtoul(val, &ms); behavior->auth_timeout.tv_sec = floor(ms / 1000.0); behavior->auth_timeout.tv_usec = (ms % 1000) * 1000; } else if (wordeq(key, "time_stats")) { ok = safe_strtoul(val, &x); behavior->time_stats = x; } else if (wordeq(key, "mcs_opts")) { if (strlen(val) < sizeof(behavior->mcs_opts)) { strcpy(behavior->mcs_opts, val); ok = true; } } else if (wordeq(key, "connect_max_errors")) { ok = safe_strtoul(val, &behavior->connect_max_errors); } else if (wordeq(key, "connect_retry_interval")) { ok = safe_strtoul(val, &behavior->connect_retry_interval); } else if (wordeq(key, "front_cache_max")) { ok = safe_strtoul(val, &behavior->front_cache_max); } else if (wordeq(key, "front_cache_lifespan")) { ok = safe_strtoul(val, &behavior->front_cache_lifespan); } else if (wordeq(key, "front_cache_spec")) { if (strlen(val) < sizeof(behavior->front_cache_spec)) { strcpy(behavior->front_cache_spec, val); ok = true; } } else if (wordeq(key, "front_cache_unspec")) { if (strlen(val) < sizeof(behavior->front_cache_unspec)) { strcpy(behavior->front_cache_unspec, val); ok = true; } } else if (wordeq(key, "key_stats_max")) { ok = safe_strtoul(val, &behavior->key_stats_max); } else if (wordeq(key, "key_stats_lifespan")) { ok = safe_strtoul(val, &behavior->key_stats_lifespan); } else if (wordeq(key, "key_stats_spec")) { if (strlen(val) < sizeof(behavior->key_stats_spec)) { strcpy(behavior->key_stats_spec, val); ok = true; } } else if (wordeq(key, "key_stats_unspec")) { if (strlen(val) < sizeof(behavior->key_stats_unspec)) { strcpy(behavior->key_stats_unspec, val); ok = true; } } else if (wordeq(key, "optimize_set")) { if (strlen(val) < sizeof(behavior->optimize_set)) { strcpy(behavior->optimize_set, val); ok = true; } } else if (wordeq(key, "usr")) { if (strlen(val) < sizeof(behavior->usr)) { strcpy(behavior->usr, val); ok = true; } } else if (wordeq(key, "pwd")) { if (strlen(val) < sizeof(behavior->pwd)) { strcpy(behavior->pwd, val); ok = true; } } else if (wordeq(key, "host")) { if (strlen(val) < sizeof(behavior->host)) { strcpy(behavior->host, val); ok = true; } } else if (wordeq(key, "port")) { ok = safe_strtol(val, &behavior->port); } else if (wordeq(key, "bucket")) { if (strlen(val) < sizeof(behavior->bucket)) { strcpy(behavior->bucket, val); ok = true; } } else if (wordeq(key, "port_listen")) { ok = safe_strtol(val, &behavior->port_listen); } else if (wordeq(key, "default_bucket_name")) { if (strlen(val) < sizeof(behavior->default_bucket_name)) { strcpy(behavior->default_bucket_name, val); ok = true; } } else if (key[0] == '#') { // Comment. ok = true; } else { if (settings.verbose > 1) { moxi_log_write("ERROR: unknown behavior key: %s\n", key); } } } if (ok == false) { moxi_log_write("ERROR: config error in key: %s value: %s\n", key, val); } }
int cproxy_init_string(char *cfg_str, proxy_behavior behavior, int nthreads) { /* cfg looks like "local_port=host:port,host:port;local_port=host:port" * like "11222=memcached1.foo.net:11211" This means local port 11222 * will be a proxy to downstream memcached server running at * host memcached1.foo.net on port 11211. */ if (cfg_str== NULL || strlen(cfg_str) <= 0) { return 0; } char *buff; char *next; char *proxy_name = "default"; char *proxy_sect; char *proxy_port_str; int proxy_port; if (settings.verbose > 1) { cproxy_dump_behavior(&behavior, "init_string", 2); } buff = trimstrdup(cfg_str); next = buff; while (next != NULL) { proxy_sect = strsep(&next, ";"); proxy_port_str = trimstr(strsep(&proxy_sect, "=")); if (proxy_sect == NULL) { moxi_log_write("bad moxi config, missing =\n"); exit(EXIT_FAILURE); } proxy_port = atoi(proxy_port_str); if (proxy_port <= 0) { moxi_log_write("missing proxy port\n"); exit(EXIT_FAILURE); } proxy_sect = trimstr(proxy_sect); int behaviors_num = 1; // Number of servers. for (char *x = proxy_sect; *x != '\0'; x++) { if (*x == ',') { behaviors_num++; } } proxy_behavior_pool behavior_pool; memset(&behavior_pool, 0, sizeof(proxy_behavior_pool)); behavior_pool.base = behavior; behavior_pool.num = behaviors_num; behavior_pool.arr = calloc(behaviors_num, sizeof(proxy_behavior)); if (behavior_pool.arr != NULL) { for (int i = 0; i < behaviors_num; i++) { behavior_pool.arr[i] = behavior; } proxy_main *m = cproxy_gen_proxy_main(behavior, nthreads, PROXY_CONF_TYPE_STATIC); if (m == NULL) { moxi_log_write("could not alloc proxy_main\n"); exit(EXIT_FAILURE); } proxy *p = cproxy_create(m, proxy_name, proxy_port, proxy_sect, 0, // config_ver. &behavior_pool, nthreads); if (p != NULL) { pthread_mutex_lock(&m->proxy_main_lock); p->next = m->proxy_head; m->proxy_head = p; pthread_mutex_unlock(&m->proxy_main_lock); int n = cproxy_listen(p); if (n > 0) { if (settings.verbose > 1) { moxi_log_write("moxi listening on %d with %d conns\n", proxy_port, n); } } else { moxi_log_write("moxi error -- port %d unavailable?\n", proxy_port); exit(EXIT_FAILURE); } } else { moxi_log_write("could not alloc proxy\n"); exit(EXIT_FAILURE); } free(behavior_pool.arr); } else { moxi_log_write("could not alloc behaviors\n"); exit(EXIT_FAILURE); } } free(buff); return 0; }
int cproxy_init_mcmux_mode(int proxy_port, proxy_behavior behavior, int nthreads) { char *proxy_name = "default"; if (settings.verbose > 1) { cproxy_dump_behavior(&behavior, "init_string", 2); } int behaviors_num = 1; // Number of servers. proxy_behavior_pool behavior_pool; memset(&behavior_pool, 0, sizeof(proxy_behavior_pool)); behavior_pool.base = behavior; behavior_pool.num = behaviors_num; behavior_pool.arr = calloc(behaviors_num, sizeof(proxy_behavior)); if (behavior_pool.arr != NULL) { for (int i = 0; i < behaviors_num; i++) { behavior_pool.arr[i] = behavior; } proxy_main *m = cproxy_gen_proxy_main(behavior, nthreads, PROXY_CONF_TYPE_STATIC); if (m == NULL) { moxi_log_write("could not alloc proxy_main\n"); exit(EXIT_FAILURE); } proxy *p = cproxy_create(m, proxy_name, proxy_port, "mcmux_config", 0, // config_ver. &behavior_pool, nthreads); if (p != NULL) { pthread_mutex_lock(&m->proxy_main_lock); p->next = m->proxy_head; m->proxy_head = p; pthread_mutex_unlock(&m->proxy_main_lock); int n = cproxy_listen(p); if (n > 0) { if (settings.verbose > 1) { moxi_log_write("moxi listening on %d with %d conns\n", proxy_port, n); } } else { moxi_log_write("moxi error -- port %d unavailable?\n", proxy_port); exit(EXIT_FAILURE); } } else { moxi_log_write("could not alloc proxy\n"); exit(EXIT_FAILURE); } free(behavior_pool.arr); } else { moxi_log_write("could not alloc behaviors\n"); exit(EXIT_FAILURE); } return 0; }
/* Used for broadcast commands, like flush_all or stats. */ bool cproxy_broadcast_a2a_downstream(downstream *d, char *command, conn *uc, char *suffix) { assert(d != NULL); assert(d->ptd != NULL); assert(d->ptd->proxy != NULL); assert(d->downstream_conns != NULL); assert(d->downstream_used_start == 0); assert(d->downstream_used == 0); assert(command != NULL); assert(uc != NULL); assert(uc->next == NULL); assert(uc->item == NULL); int nwrite = 0; int nconns = mcs_server_count(&d->mst); for (int i = 0; i < nconns; i++) { conn *c = d->downstream_conns[i]; if (c != NULL && c != NULL_CONN) { if (cproxy_prep_conn_for_write(c)) { assert(c->state == conn_pause); out_string(c, command); if (update_event(c, EV_WRITE | EV_PERSIST)) { nwrite++; if (uc->noreply) { c->write_and_go = conn_pause; } } else { if (settings.verbose > 1) { moxi_log_write("Update cproxy write event failed\n"); } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } } else { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(c); } } } if (settings.verbose > 1) { moxi_log_write("%d: a2a broadcast nwrite %d out of %d\n", uc->sfd, nwrite, nconns); } if (nwrite > 0) { d->downstream_used_start = nwrite; d->downstream_used = nwrite; if (cproxy_dettach_if_noreply(d, uc) == false) { d->upstream_suffix = suffix; d->upstream_suffix_len = 0; d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS; d->upstream_retry = 0; d->target_host_ident = NULL; cproxy_start_downstream_timeout(d, NULL); } else { // TODO: Handle flush_all's expiration parameter against // the front_cache. // if (strncmp(command, "flush_all", 9) == 0) { mcache_flush_all(&d->ptd->proxy->front_cache, 0); } } return true; } return false; }
/** Called by libevent, on the receiving thread, when * there is work for the receiving thread to handle. */ void work_recv(int fd, short which, void *arg) { assert(which & EV_READ); work_queue *m = arg; assert(m != NULL); assert(m->recv_fd == fd); assert(m->send_fd >= 0); assert(m->event_base != NULL); work_item *curr = NULL; work_item *next = NULL; char buf[1]; // The lock area includes the read() for safety, // as the pipe acts like a cond variable. // pthread_mutex_lock(&m->work_lock); int readrv = read(fd, buf, 1); assert(readrv == 1); if (readrv != 1) { #ifdef WORK_DEBUG // Perhaps libevent called us in incorrect way. // moxi_log_write("unexpected work_recv read value\n"); #endif } curr = m->work_head; m->work_head = NULL; m->work_tail = NULL; #ifdef WORK_DEBUG moxi_log_write("work_recv %x %x %x %d %d %d %llu %llu %d\n", (int) pthread_self(), (int) m, (int) m->event_base, m->send_fd, m->recv_fd, curr != NULL, m->num_items, m->tot_sends, fd); #endif pthread_mutex_unlock(&m->work_lock); uint64_t num_items = 0; while (curr != NULL) { next = curr->next; num_items++; curr->func(curr->data0, curr->data1); free(curr); curr = next; } if (num_items > 0) { pthread_mutex_lock(&m->work_lock); m->tot_recvs += num_items; m->num_items -= num_items; pthread_mutex_unlock(&m->work_lock); } }
/* We get here after reading the header+body into an item. */ void cproxy_process_upstream_binary_nread(conn *c) { cb_assert(c != NULL); cb_assert(c->cmd >= 0); cb_assert(c->next == NULL); cb_assert(c->cmd_start == NULL); cb_assert(IS_BINARY(c->protocol)); cb_assert(IS_PROXY(c->protocol)); protocol_binary_request_header *header = (protocol_binary_request_header *) &c->binary_header; int extlen = header->request.extlen; int keylen = header->request.keylen; uint32_t bodylen = header->request.bodylen; if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_upstream_binary_nread %x %d %d %u\n", c->sfd, c->cmd, extlen, keylen, bodylen); } /* pthread_mutex_lock(&c->thread->stats.mutex); */ /* c->thread->stats.slab_stats[it->slabs_clsid].set_cmds++; */ /* pthread_mutex_unlock(&c->thread->stats.mutex); */ proxy_td *ptd = c->extra; cb_assert(ptd != NULL); if (header->request.opcode == PROTOCOL_BINARY_CMD_SASL_AUTH) { item *it = c->item; cb_assert(it); cproxy_sasl_plain_auth(c, (char *) ITEM_data(it)); return; } if (header->request.opcode == PROTOCOL_BINARY_CMD_SASL_STEP) { write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); return; } if (c->binary_header.request.opcode == PROTOCOL_BINARY_CMD_STAT) { char *subcommand = binary_get_key(c); size_t nkey = c->binary_header.request.keylen; if (nkey == 13 && memcmp(subcommand, "proxy buckets", 13) == 0) { process_bin_proxy_stats(c); return; } } if (c->noreply) { if (settings.verbose > 2) { moxi_log_write("<%d cproxy_process_upstream_binary_nread " "corking quiet command %x %d\n", c->sfd, c->cmd, (c->corked != NULL)); } /* TODO: We currently don't support binary FLUSHQ. */ /* Rather than having the downstream connections get */ /* into a wonky state, prevent it. */ if (header->request.opcode == PROTOCOL_BINARY_CMD_FLUSHQ) { /* Note: don't use cproxy_close_conn(c), as it goes */ /* through the drive_machine() loop again. */ /* cproxy_close_conn(c); */ conn_set_state(c, conn_closing); return; } /* Hold onto or 'cork' all the binary quiet commands */ /* until there's a later non-quiet command. */ if (cproxy_binary_cork_cmd(c)) { conn_set_state(c, conn_new_cmd); } else { ptd->stats.stats.err_oom++; cproxy_close_conn(c); } return; } cb_assert(c->item == NULL || ((item *) c->item)->refcount == 1); cproxy_pause_upstream_for_downstream(ptd, c); }
/** A work queue is a mechanism to allow thread-to-thread * communication in a libevent-based, multithreaded system. * * One thread can send work to another thread. The receiving thread * should be libevent-based, with a processing loop handled by * libevent. * * Use work_queue_init() to initialize a work_queue structure, * where the work_queue structure memory is owned by the caller. * * Returns true on success. */ bool work_queue_init(work_queue *m, struct event_base *event_base) { assert(m != NULL); memset(m, 0, sizeof(work_queue)); pthread_mutex_init(&m->work_lock, NULL); m->work_head = NULL; m->work_tail = NULL; m->num_items = 0; m->tot_sends = 0; m->tot_recvs = 0; m->event_base = event_base; assert(m->event_base != NULL); int fds[2] = {0}; #ifdef WIN32 struct sockaddr_in serv_addr; int sockfd; if ((sockfd = createLocalListSock(&serv_addr)) < 0 || createLocalSocketPair(sockfd,fds,&serv_addr) == -1) { fprintf(stderr, "Can't create notify pipe: %s", strerror(errno)); return false; } #else if (pipe(fds)) { perror("Can't create notify pipe"); return false; } #endif m->recv_fd = fds[0]; m->send_fd = fds[1]; event_set(&m->event, m->recv_fd, EV_READ | EV_PERSIST, work_recv, m); event_base_set(m->event_base, &m->event); if (event_add(&m->event, 0) == 0) { #ifdef WORK_DEBUG moxi_log_write("work_queue_init %x %x %x %d %d %u %llu\n", (int) pthread_self(), (int) m, (int) m->event_base, m->send_fd, m->recv_fd, m->work_head != NULL, m->tot_sends); #endif return true; } #ifdef WORK_DEBUG moxi_log_write("work_queue_init error\n"); #endif return false; }
static void cproxy_sasl_plain_auth(conn *c, char *req_bytes) { protocol_binary_request_header *req; char *key; int keylen; int bodylen; char *clientin; unsigned int clientinlen; proxy_td *ptd = c->extra; cb_assert(ptd != NULL); cb_assert(ptd->proxy != NULL); cb_assert(ptd->proxy->main != NULL); /* Authenticate an upstream connection. */ req = (protocol_binary_request_header *) req_bytes; key = ((char *) req) + sizeof(*req) + req->request.extlen; keylen = ntohs(req->request.keylen); bodylen = ntohl(req->request.bodylen); /* The key is the sasl mech. */ if (keylen != 5 || memcmp(key, "PLAIN", 5) != 0) { /* 5 == strlen("PLAIN"). */ write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); return; } clientin = key + keylen; clientinlen = bodylen - keylen - req->request.extlen; /* The clientin string looks like "[authzid]\0username\0password". */ while (clientinlen > 0 && clientin[0] != '\0') { /* Skip authzid. */ clientin++; clientinlen--; } if (clientinlen > 2 && clientinlen < 128 && clientin[0] == '\0') { const char *username = clientin + 1; char password[256]; int uslen = strlen(username); int pwlen = clientinlen - 2 - uslen; if (pwlen < (int) sizeof(password)) { proxy *p; memcpy(password, clientin + 2 + uslen, pwlen); password[pwlen] = '\0'; p = cproxy_find_proxy_by_auth(ptd->proxy->main, username, password); if (p != NULL) { proxy_td *ptd_target = cproxy_find_thread_data(p, cb_thread_self()); if (ptd_target != NULL) { c->extra = ptd_target; write_bin_response(c, "Authenticated", 0, 0, strlen("Authenticated")); if (settings.verbose > 2) { moxi_log_write("<%d sasl authenticated for %s\n", c->sfd, username); } return; } else { if (settings.verbose > 2) { moxi_log_write("<%d sasl auth failed on ptd for %s\n", c->sfd, username); } } } else { if (settings.verbose > 2) { moxi_log_write("<%d sasl auth failed for %s (%d)\n", c->sfd, username, pwlen); } } } else { if (settings.verbose > 2) { moxi_log_write("<%d sasl auth failed for %s with empty password\n", c->sfd, username); } } } else { if (settings.verbose > 2) { moxi_log_write("<%d sasl auth failed with malformed PLAIN data\n", c->sfd); } } /* TODO: If authentication failed, we should consider */ /* reassigning the connection to the NULL_BUCKET. */ write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); }
mcs_st *lvb_create(mcs_st *ptr, const char *config) { assert(ptr); memset(ptr, 0, sizeof(*ptr)); ptr->kind = MCS_KIND_LIBVBUCKET; VBUCKET_CONFIG_HANDLE vch = vbucket_config_parse_string(config); if (vch != NULL) { ptr->data = vch; ptr->nservers = vbucket_config_get_num_servers(vch); if (ptr->nservers > 0) { ptr->servers = calloc(sizeof(mcs_server_st), ptr->nservers); if (ptr->servers != NULL) { for (int i = 0; i < ptr->nservers; i++) { ptr->servers[i].fd = -1; } int j = 0; for (; j < ptr->nservers; j++) { const char *hostport = vbucket_config_get_server(vch, j); if (hostport != NULL && strlen(hostport) > 0 && strlen(hostport) < sizeof(ptr->servers[j].hostname) - 1) { strncpy(ptr->servers[j].hostname, hostport, sizeof(ptr->servers[j].hostname) - 1); char *colon = strchr(ptr->servers[j].hostname, ':'); if (colon != NULL) { *colon = '\0'; ptr->servers[j].port = atoi(colon + 1); if (ptr->servers[j].port <= 0) { moxi_log_write("mcs_create failed, could not parse port: %s\n", config); break; } } else { moxi_log_write("mcs_create failed, missing port: %s\n", config); break; } } else { moxi_log_write("mcs_create failed, unknown server: %s\n", config); break; } const char *user = vbucket_config_get_user(vch); if (user != NULL) { ptr->servers[j].usr = strdup(user); } const char *password = vbucket_config_get_password(vch); if (password != NULL) { ptr->servers[j].pwd = strdup(password); } } if (j >= ptr->nservers) { return ptr; } } } } else { moxi_log_write("mcs_create failed, vbucket_config_parse_string: %s\n", config); } mcs_free(ptr); return NULL; }