Example #1
0
/* Used for broadcast commands, like flush_all or stats.
 */
bool cproxy_broadcast_a2a_downstream(downstream *d,
                                     char *command,
                                     conn *uc,
                                     char *suffix) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(command != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);
    assert(uc->item == NULL);

    int nwrite = 0;
    int nconns = memcached_server_count(&d->mst);

    for (int i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL) {
            if (cproxy_prep_conn_for_write(c)) {
                assert(c->state == conn_pause);

                out_string(c, command);

                if (update_event(c, EV_WRITE | EV_PERSIST)) {
                    nwrite++;

                    if (uc->noreply) {
                        c->write_and_go = conn_pause;
                    }
                } else {
                    if (settings.verbose > 1)
                        fprintf(stderr,
                                "Update cproxy write event failed\n");

                    d->ptd->stats.stats.err_oom++;
                    cproxy_close_conn(c);
                }
            } else {
                d->ptd->stats.stats.err_downstream_write_prep++;
                cproxy_close_conn(c);
            }
        }
    }

    if (settings.verbose > 1)
        fprintf(stderr, "forward multiget nwrite %d out of %d\n",
                nwrite, nconns);

    d->downstream_used_start = nwrite;
    d->downstream_used       = nwrite;

    if (cproxy_dettach_if_noreply(d, uc) == false) {
        d->upstream_suffix = suffix;

        cproxy_start_downstream_timeout(d, NULL);
    } else {
        // TODO: Handle flush_all's expiration parameter against
        // the front_cache.
        //
        if (strncmp(command, "flush_all", 9) == 0) {
            mcache_flush_all(&d->ptd->proxy->front_cache, 0);
        }
    }

    return nwrite > 0;
}
Example #2
0
/* Forward an upstream command that came with item data,
 * like set/add/replace/etc.
 */
bool cproxy_forward_a2a_item_downstream(downstream *d, short cmd,
                                        item *it, conn *uc) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(it != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);

    // Assuming we're already connected to downstream.
    //
    bool self = false;

    conn *c = cproxy_find_downstream_conn(d, ITEM_key(it), it->nkey,
                                          &self);
    if (c != NULL) {
        if (self) {
            cproxy_optimize_to_self(d, uc, uc->cmd_start);
            complete_nread_ascii(uc);
            return true;
        }

        if (cproxy_prep_conn_for_write(c)) {
            assert(c->state == conn_pause);

            char *verb = nread_text(cmd);
            assert(verb != NULL);

            char *str_flags   = ITEM_suffix(it);
            char *str_length  = strchr(str_flags + 1, ' ');
            int   len_flags   = str_length - str_flags;
            int   len_length  = it->nsuffix - len_flags - 2;
            char *str_exptime = add_conn_suffix(c);
            char *str_cas     = (cmd == NREAD_CAS ? add_conn_suffix(c) : NULL);

            if (str_flags != NULL &&
                str_length != NULL &&
                len_flags > 1 &&
                len_length > 1 &&
                str_exptime != NULL &&
                (cmd != NREAD_CAS ||
                 str_cas != NULL)) {
                sprintf(str_exptime, " %u", it->exptime);

                if (str_cas != NULL)
                    sprintf(str_cas, " %llu",
                            (unsigned long long) ITEM_get_cas(it));

                if (add_iov(c, verb, strlen(verb)) == 0 &&
                    add_iov(c, ITEM_key(it), it->nkey) == 0 &&
                    add_iov(c, str_flags, len_flags) == 0 &&
                    add_iov(c, str_exptime, strlen(str_exptime)) == 0 &&
                    add_iov(c, str_length, len_length) == 0 &&
                    (str_cas == NULL ||
                     add_iov(c, str_cas, strlen(str_cas)) == 0) &&
                    (uc->noreply == false ||
                     add_iov(c, " noreply", 8) == 0) &&
                    add_iov(c, ITEM_data(it) - 2, it->nbytes + 2) == 0) {
                    conn_set_state(c, conn_mwrite);
                    c->write_and_go = conn_new_cmd;

                    if (update_event(c, EV_WRITE | EV_PERSIST)) {
                        d->downstream_used_start = 1;
                        d->downstream_used       = 1;

                        if (cproxy_dettach_if_noreply(d, uc) == false) {
                            cproxy_start_downstream_timeout(d, c);

                            // During a synchronous (with-reply) SET,
                            // handle fire-&-forget SET optimization.
                            //
                            if (cmd == NREAD_SET &&
                                cproxy_optimize_set_ascii(d, uc,
                                                          ITEM_key(it),
                                                          it->nkey)) {
                                d->ptd->stats.stats.tot_optimize_sets++;
                            }
                        } else {
                            c->write_and_go = conn_pause;

                            mcache_delete(&d->ptd->proxy->front_cache,
                                          ITEM_key(it), it->nkey);
                        }

                        return true;
                    }
                }

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(c);
            } else {
                // TODO: Handle this weird error case.
            }
        } else {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(c);
        }

        if (settings.verbose > 1)
            fprintf(stderr, "Proxy item write out of memory");
    }

    return false;
}
Example #3
0
void cproxy_process_upstream_binary(conn *c) {
    cb_assert(c != NULL);
    cb_assert(c->cmd >= 0);
    cb_assert(c->next == NULL);
    cb_assert(c->item == NULL);
    cb_assert(IS_BINARY(c->protocol));
    cb_assert(IS_PROXY(c->protocol));

    proxy_td *ptd = c->extra;
    cb_assert(ptd != NULL);

    if (!cproxy_prep_conn_for_write(c)) {
        ptd->stats.stats.err_upstream_write_prep++;
        conn_set_state(c, conn_closing);
        return;
    }

    c->cmd_curr       = -1;
    c->cmd_start      = NULL;
    c->cmd_start_time = msec_current_time;
    c->cmd_retries    = 0;

    int      extlen  = c->binary_header.request.extlen;
    int      keylen  = c->binary_header.request.keylen;
    uint32_t bodylen = c->binary_header.request.bodylen;

    cb_assert(bodylen >= (uint32_t) keylen + extlen);

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_upstream_binary %x %d %d %u\n",
                c->sfd, c->cmd, extlen, keylen, bodylen);
    }

    process_bin_noreply(c); /* Map quiet c->cmd values into non-quiet. */

    if (c->cmd == PROTOCOL_BINARY_CMD_VERSION ||
        c->cmd == PROTOCOL_BINARY_CMD_QUIT) {
        dispatch_bin_command(c);
        return;
    }

    /* Alloc an item and continue with an rest-of-body nread if */
    /* necessary.  The item will hold the entire request message */
    /* (the header + body). */

    char *ikey    = "u";
    int   ikeylen = 1;

    c->item = item_alloc(ikey, ikeylen, 0, 0,
                         sizeof(c->binary_header) + bodylen);
    if (c->item != NULL) {
        item *it = c->item;
        void *rb = c->rcurr;

        cb_assert(it->refcount == 1);

        memcpy(ITEM_data(it), rb, sizeof(c->binary_header));

        if (bodylen > 0) {
            c->ritem = ITEM_data(it) + sizeof(c->binary_header);
            c->rlbytes = bodylen;
            c->substate = bin_read_set_value;

            conn_set_state(c, conn_nread);
        } else {
            /* Since we have no body bytes, we can go immediately to */
            /* the nread completed processing step. */

            if (c->binary_header.request.opcode == PROTOCOL_BINARY_CMD_SASL_LIST_MECHS) {
                /* TODO: One day handle more than just PLAIN sasl auth. */

                write_bin_response(c, "PLAIN", 0, 0, strlen("PLAIN"));
                return;
            }

            cproxy_pause_upstream_for_downstream(ptd, c);
        }
    } else {
        if (settings.verbose > 2) {
            moxi_log_write("<%d cproxy_process_upstream_binary OOM\n",
                           c->sfd);
        }
        ptd->stats.stats.err_oom++;
        cproxy_close_conn(c);
    }
}
Example #4
0
/* Forward a simple one-liner command downstream.
 * For example, get, incr/decr, delete, etc.
 * The response, though, might be a simple line or
 * multiple VALUE+END lines.
 */
bool cproxy_forward_a2a_simple_downstream(downstream *d,
                                          char *command, conn *uc) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(command != NULL);
    assert(uc != NULL);
    assert(uc->item == NULL);
    assert(uc->cmd_curr != -1);
    assert(d->multiget == NULL);
    assert(d->merger == NULL);

    // Handles get and gets.
    //
    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_GET) {
        // Only use front_cache for 'get', not for 'gets'.
        //
        mcache *front_cache =
            (command[3] == ' ') ? &d->ptd->proxy->front_cache : NULL;

        return multiget_ascii_downstream(d, uc,
                                         a2a_multiget_start,
                                         a2a_multiget_skey,
                                         a2a_multiget_end,
                                         front_cache);
    }

    assert(uc->next == NULL);

    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH)
        return cproxy_broadcast_a2a_downstream(d, command, uc,
                                               "OK\r\n");

    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_STAT) {
        if (strncmp(command + 5, " reset", 6) == 0)
            return cproxy_broadcast_a2a_downstream(d, command, uc,
                                                   "RESET\r\n");

        if (cproxy_broadcast_a2a_downstream(d, command, uc,
                                            "END\r\n")) {
            d->merger = genhash_init(512, skeyhash_ops);
            return true;
        } else {
            return false;
        }
    }

    // TODO: Inefficient repeated scan_tokens.
    //
    int      cmd_len = 0;
    token_t  tokens[MAX_TOKENS];
    size_t   ntokens = scan_tokens(command, tokens, MAX_TOKENS, &cmd_len);
    char    *key     = tokens[KEY_TOKEN].value;
    int      key_len = tokens[KEY_TOKEN].length;

    if (ntokens <= 1) { // This was checked long ago, while parsing
        assert(false);  // the upstream conn.
        return false;
    }

    // Assuming we're already connected to downstream.
    //
    bool self = false;

    conn *c = cproxy_find_downstream_conn(d, key, key_len,
                                          &self);
    if (c != NULL) {
        if (self) {
            // TODO: This optimization could be done much earlier,
            // even before the upstream conn was assigned
            // to a downstream.
            //
            cproxy_optimize_to_self(d, uc, command);
            process_command(uc, command);
            return true;
        }

        if (cproxy_prep_conn_for_write(c)) {
            assert(c->state == conn_pause);

            out_string(c, command);

            if (settings.verbose > 1)
                fprintf(stderr, "forwarding to %d, noreply %d\n",
                        c->sfd, uc->noreply);

            if (update_event(c, EV_WRITE | EV_PERSIST)) {
                d->downstream_used_start = 1;
                d->downstream_used       = 1;

                if (cproxy_dettach_if_noreply(d, uc) == false) {
                    cproxy_start_downstream_timeout(d, c);
                } else {
                    c->write_and_go = conn_pause;

                    // Do mcache_delete() here only during a noreply,
                    // otherwise for with-reply requests, we could
                    // be in a race with other clients repopulating
                    // the front_cache.  For with-reply requests, we
                    // clear the front_cache when we get a success reply.
                    //
                    mcache_delete(&d->ptd->proxy->front_cache, key, key_len);
                }

                return true;
            }

            if (settings.verbose > 1)
                fprintf(stderr, "Couldn't update cproxy write event\n");

            d->ptd->stats.stats.err_oom++;
            cproxy_close_conn(c);
        } else {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(c);
        }
    }

    return false;
}
/* Used for broadcast commands, like flush_all or stats.
 */
bool cproxy_broadcast_a2a_downstream(downstream *d,
                                     char *command,
                                     conn *uc,
                                     char *suffix) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(d->downstream_used_start == 0);
    assert(d->downstream_used == 0);
    assert(command != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);
    assert(uc->item == NULL);

    int nwrite = 0;
    int nconns = mcs_server_count(&d->mst);

    for (int i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            c != NULL_CONN) {
            if (cproxy_prep_conn_for_write(c)) {
                assert(c->state == conn_pause);

                out_string(c, command);

                if (update_event(c, EV_WRITE | EV_PERSIST)) {
                    nwrite++;

                    if (uc->noreply) {
                        c->write_and_go = conn_pause;
                    }
                } else {
                    if (settings.verbose > 1) {
                        moxi_log_write("Update cproxy write event failed\n");
                    }

                    d->ptd->stats.stats.err_oom++;
                    cproxy_close_conn(c);
                }
            } else {
                d->ptd->stats.stats.err_downstream_write_prep++;
                cproxy_close_conn(c);
            }
        }
    }

    if (settings.verbose > 1) {
        moxi_log_write("%d: a2a broadcast nwrite %d out of %d\n",
                uc->sfd, nwrite, nconns);
    }

    if (nwrite > 0) {
        d->downstream_used_start = nwrite;
        d->downstream_used       = nwrite;

        if (cproxy_dettach_if_noreply(d, uc) == false) {
            d->upstream_suffix = suffix;
            d->upstream_suffix_len = 0;
            d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS;
            d->upstream_retry = 0;
            d->target_host_ident = NULL;

            cproxy_start_downstream_timeout(d, NULL);
        } else {
            // TODO: Handle flush_all's expiration parameter against
            // the front_cache.
            //
            if (strncmp(command, "flush_all", 9) == 0) {
                mcache_flush_all(&d->ptd->proxy->front_cache, 0);
            }
        }

        return true;
    }

    return false;
}
Example #6
0
/* Do the actual work of forwarding the command from an
 * upstream binary conn to its assigned binary downstream.
 */
bool cproxy_forward_b2b_downstream(downstream *d) {
    int nc;
    int server_index;
    conn *uc;

    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(d->downstream_used == 0);
    cb_assert(d->multiget == NULL);
    cb_assert(d->merger == NULL);

    d->downstream_used_start = 0;

    uc = d->upstream_conn;
    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream %x\n",
                uc->sfd, uc->cmd);
    }

    cb_assert(uc != NULL);
    cb_assert(uc->state == conn_pause);
    cb_assert(uc->cmd >= 0);
    cb_assert(uc->cmd_start == NULL);
    cb_assert(uc->thread != NULL);
    cb_assert(uc->thread->base != NULL);
    cb_assert(uc->noreply == false);
    cb_assert(IS_BINARY(uc->protocol));
    cb_assert(IS_PROXY(uc->protocol));

    server_index = -1;

    if (cproxy_is_broadcast_cmd(uc->cmd) == false && uc->corked == NULL) {
        item *it = uc->item;
        protocol_binary_request_header *req;
        char *key;
        int key_len;

        cb_assert(it != NULL);

        req = (protocol_binary_request_header *) ITEM_data(it);
        key = ((char *) req) + sizeof(*req) + req->request.extlen;
        key_len = ntohs(req->request.keylen);

        if (key_len > 0) {
            server_index = cproxy_server_index(d, key, key_len, NULL);
            if (server_index < 0) {
                return false;
            }
        }
    }

    nc = cproxy_connect_downstream(d, uc->thread, server_index);
    if (nc == -1) {
        return true;
    }

    if (nc > 0) {
        int i;
        int nconns;

        cb_assert(d->downstream_conns != NULL);

        if (d->usec_start == 0 &&
            d->ptd->behavior_pool.base.time_stats) {
            d->usec_start = usec_now();
        }

        nconns = mcs_server_count(&d->mst);
        for (i = 0; i < nconns; i++) {
            conn *c = d->downstream_conns[i];
            if (c != NULL &&
                c != NULL_CONN) {
                cb_assert(c->state == conn_pause);
                cb_assert(c->item == NULL);

                if (cproxy_prep_conn_for_write(c) == false) {
                    d->ptd->stats.stats.err_downstream_write_prep++;
                    cproxy_close_conn(c);

                    return false;
                }
            }
        }

        /* Uncork the saved-up quiet binary commands. */

        cproxy_binary_uncork_cmds(d, uc);

        if (uc->cmd == PROTOCOL_BINARY_CMD_FLUSH ||
            uc->cmd == PROTOCOL_BINARY_CMD_NOOP ||
            uc->cmd == PROTOCOL_BINARY_CMD_STAT) {
            return cproxy_broadcast_b2b_downstream(d, uc);
        }

        return cproxy_forward_b2b_simple_downstream(d, uc);
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream connect failed\n",
                uc->sfd);
    }

    return false;
}
Example #7
0
void cproxy_process_upstream_ascii(conn *c, char *line) {
    assert(c != NULL);
    assert(c->next == NULL);
    assert(c->extra != NULL);
    assert(c->cmd == -1);
    assert(c->item == NULL);
    assert(line != NULL);
    assert(line == c->rcurr);
    assert(IS_ASCII(c->protocol));
    assert(IS_PROXY(c->protocol));

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_upstream_ascii %s\n",
                       c->sfd, line);
    }

    // Snapshot rcurr, because the caller, try_read_command(), changes it.
    //
    c->cmd_curr       = -1;
    c->cmd_start      = c->rcurr;
    c->cmd_start_time = msec_current_time;
    c->cmd_retries    = 0;

    proxy_td *ptd = c->extra;
    assert(ptd != NULL);

    /* For commands set/add/replace, we build an item and read the data
     * directly into it, then continue in nread_complete().
     */
    if (!cproxy_prep_conn_for_write(c)) {
        ptd->stats.stats.err_upstream_write_prep++;
        conn_set_state(c, conn_closing);
        return;
    }

    bool mcmux_command = false;
    bool self_command = false;

    /* Check for proxy pattern - A:host:port or B:host:port */
    if (true == settings.enable_mcmux_mode &&
            ((*line == 'A' || *line == 'B') && *(line + 1) == ':')) {
        mcmux_command = true;
    } else if (true == settings.enable_mcmux_mode) {
        self_command = true;
    }

    c->peer_protocol = 0;
    c->peer_host = NULL;
    c->peer_port = 0;

    if (mcmux_command) {
        char *peer_port = NULL;
        int i = 0;

        c->peer_protocol = (*line == 'A') ?
                           proxy_downstream_ascii_prot :
                           proxy_downstream_binary_prot;
        line += 2;
        c->peer_host = line;

        while (*line != ' ' && *line != '\0' &&
                *line != ':' && ++i < MAX_HOSTNAME_LEN) {
            line++;
        }

        if (*line == '\0' || line - c->peer_host <= 0) {
            out_string(c, "ERROR");
            moxi_log_write("Malformed request line");
            return;
        }
        *line = '\0';
        line++;
        peer_port = line;
        i = 0;

        while (*line != ' ' && *line != '\0' && ++i <= MAX_PORT_LEN) {
            line++;
        }

        if (*line == '\0' || line - peer_port <= 0) {
            out_string(c, "ERROR");
            moxi_log_write("Malformed request line");
            return;
        }

        c->peer_port = atoi(peer_port);

        *line++ = '\0';
        c->cmd_start = line;
    }

    int     cmd_len = 0;
    token_t tokens[MAX_TOKENS];
    size_t  ntokens = scan_tokens(line, tokens, MAX_TOKENS, &cmd_len);
    char   *cmd     = tokens[COMMAND_TOKEN].value;
    int     cmdx    = -1;
    int     cmd_st  = STATS_CMD_TYPE_REGULAR;
    int     comm;

#define SEEN(cmd_id, is_cas, cmd_len)                           \
    cmd_st = c->noreply ?                                       \
        STATS_CMD_TYPE_QUIET : STATS_CMD_TYPE_REGULAR;          \
    ptd->stats.stats_cmd[cmd_st][cmd_id].seen++;                \
    ptd->stats.stats_cmd[cmd_st][cmd_id].read_bytes += cmd_len; \
    if (is_cas) {                                               \
        ptd->stats.stats_cmd[cmd_st][cmd_id].cas++;             \
    }

    if (ntokens >= 3 &&
            (false == self_command) &&
            (strncmp(cmd, "get", 3) == 0)) {
        if (cmd[3] == 'l') {
            c->cmd_curr = PROTOCOL_BINARY_CMD_GETL;
        } else if (ntokens == 3) {
            // Single-key get/gets optimization.
            //
            c->cmd_curr = PROTOCOL_BINARY_CMD_GETK;
        } else {
            c->cmd_curr = PROTOCOL_BINARY_CMD_GETKQ;
        }

        // Handles get and gets.
        //
        cproxy_pause_upstream_for_downstream(ptd, c);

        // The cmd_len from scan_tokens might not include
        // all the keys, so cmd_len might not == strlen(command).
        // Handle read_bytes during multiget broadcast.
        //
        if (cmd[3] == 'l') {
            SEEN(STATS_CMD_GETL, true, 0);
        } else {
            SEEN(STATS_CMD_GET, cmd[3] == 's', 0);
        }

    } else if ((ntokens == 6 || ntokens == 7) &&
               (false == self_command) &&
               ((strncmp(cmd, "add", 3) == 0 &&
                 (comm = NREAD_ADD) &&
                 (cmdx = STATS_CMD_ADD) &&
                 (c->cmd_curr = PROTOCOL_BINARY_CMD_ADD)) ||
                (strncmp(cmd, "set", 3) == 0 &&
                 (comm = NREAD_SET) &&
                 (cmdx = STATS_CMD_SET) &&
                 (c->cmd_curr = PROTOCOL_BINARY_CMD_SET)) ||
                (strncmp(cmd, "replace", 7) == 0 &&
                 (comm = NREAD_REPLACE) &&
                 (cmdx = STATS_CMD_REPLACE) &&
                 (c->cmd_curr = PROTOCOL_BINARY_CMD_REPLACE)) ||
                (strncmp(cmd, "prepend", 7) == 0 &&
                 (comm = NREAD_PREPEND) &&
                 (cmdx = STATS_CMD_PREPEND) &&
                 (c->cmd_curr = PROTOCOL_BINARY_CMD_PREPEND)) ||
                (strncmp(cmd, "append", 6) == 0 &&
                 (comm = NREAD_APPEND) &&
                 (cmdx = STATS_CMD_APPEND) &&
                 (c->cmd_curr = PROTOCOL_BINARY_CMD_APPEND)))) {
        assert(c->item == NULL);
        c->item = NULL;

        process_update_command(c, tokens, ntokens, comm, false);

        if (cmdx >= 0) {
            item *it = c->item;
            if (it != NULL) {
                SEEN(cmdx, false, cmd_len + it->nbytes);
            } else {
                SEEN(cmdx, false, cmd_len);
                ptd->stats.stats_cmd[cmd_st][cmdx].misses++;
            }
        }

    } else if ((ntokens == 7 || ntokens == 8) &&
               (false == self_command) &&
               (strncmp(cmd, "cas", 3) == 0 &&
                (comm = NREAD_CAS) &&
                (c->cmd_curr = PROTOCOL_BINARY_CMD_SET))) {
        assert(c->item == NULL);
        c->item = NULL;

        process_update_command(c, tokens, ntokens, comm, true);

        item *it = c->item;
        if (it != NULL) {
            SEEN(STATS_CMD_CAS, true, cmd_len + it->nbytes);
        } else {
            SEEN(STATS_CMD_CAS, true, cmd_len);
            ptd->stats.stats_cmd[cmd_st][STATS_CMD_CAS].misses++;
        }

    } else if ((ntokens == 4 || ntokens == 5) &&
               (false == self_command) &&
               (strncmp(cmd, "incr", 4) == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_INCREMENT)) {
        set_noreply_maybe(c, tokens, ntokens);
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_INCR, false, cmd_len);

    } else if ((ntokens == 4 || ntokens == 5) &&
               (false == self_command) &&
               (strncmp(cmd, "decr", 4) == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_DECREMENT)) {
        set_noreply_maybe(c, tokens, ntokens);
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_DECR, false, cmd_len);

    } else if (ntokens >= 3 && ntokens <= 4 &&
               (false == self_command) &&
               (strncmp(cmd, "delete", 6) == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_DELETE)) {
        set_noreply_maybe(c, tokens, ntokens);
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_DELETE, false, cmd_len);

    } else if (ntokens >= 2 && ntokens <= 4 &&
               (false == self_command) &&
               (strncmp(cmd, "flush_all", 9) == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_FLUSH)) {
        set_noreply_maybe(c, tokens, ntokens);
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_FLUSH_ALL, false, cmd_len);

    } else if (ntokens >= 3 && ntokens <= 4 &&
               (strncmp(cmd, "stats proxy", 10) == 0)) {

        process_stats_proxy_command(c, tokens, ntokens);

        SEEN(STATS_CMD_STATS, false, cmd_len);

    } else if (ntokens == 3 &&
               (false == self_command) &&
               (strcmp(cmd, "stats reset") == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_STAT)) {
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_STATS_RESET, false, cmd_len);

    } else if (ntokens == 2 &&
               (false == self_command) &&
               (strcmp(cmd, "stats") == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_STAT)) {
        // Even though we've coded to handle advanced stats
        // like stats cachedump, prevent those here to avoid
        // locking downstream servers.
        //
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_STATS, false, cmd_len);

    } else if (ntokens == 2 &&
               (true == mcmux_command) &&
               (strncmp(cmd, "version", 7) == 0) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_VERSION)) {
        /* downstream version command */
        cproxy_pause_upstream_for_downstream(ptd, c);

        SEEN(STATS_CMD_VERSION, false, cmd_len);

    } else if (ntokens == 2 &&
               (strncmp(cmd, "version", 7) == 0)) {
        out_string(c, "VERSION " VERSION);

        SEEN(STATS_CMD_VERSION, false, cmd_len);

    } else if ((ntokens == 3 || ntokens == 4) &&
               (strncmp(cmd, "verbosity", 9) == 0)) {
        process_verbosity_command(c, tokens, ntokens);

        SEEN(STATS_CMD_VERBOSITY, false, cmd_len);

    } else if (ntokens == 2 &&
               (strncmp(cmd, "quit", 4) == 0)) {
        conn_set_state(c, conn_closing);

        SEEN(STATS_CMD_QUIT, false, cmd_len);

    } else if (ntokens == 4 &&
               (strncmp(cmd, "unl", 3) == 0) &&
               (false == self_command) &&
               (c->cmd_curr = PROTOCOL_BINARY_CMD_UNL)) {
        cproxy_pause_upstream_for_downstream(ptd, c);
        SEEN(STATS_CMD_UNL, false, cmd_len);
    } else {
        out_string(c, "ERROR");

        SEEN(STATS_CMD_ERROR, false, cmd_len);
    }
}
bool multiget_ascii_downstream(downstream *d, conn *uc,
    int (*emit_start)(conn *c, char *cmd, int cmd_len),
    int (*emit_skey)(conn *c, char *skey, int skey_len),
    int (*emit_end)(conn *c),
    mcache *front_cache) {
    assert(d != NULL);
    assert(d->downstream_conns != NULL);
    assert(d->multiget == NULL);
    assert(uc != NULL);
    assert(uc->noreply == false);

    proxy_td *ptd = d->ptd;
    assert(ptd != NULL);

    proxy_stats_cmd *psc_get =
        &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET];
    proxy_stats_cmd *psc_get_key =
        &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET_KEY];

    int nwrite = 0;
    int nconns = mcs_server_count(&d->mst);

    for (int i = 0; i < nconns; i++) {
        if (d->downstream_conns[i] != NULL &&
            cproxy_prep_conn_for_write(d->downstream_conns[i]) == false) {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(d->downstream_conns[i]);
            return false;
        }
    }

    if (uc->next != NULL) {
        // More than one upstream conn, so we need a hashtable
        // to track keys for de-deplication.
        //
        d->multiget = genhash_init(128, skeyhash_ops);
        if (settings.verbose > 1) {
            fprintf(stderr, "cproxy multiget hash table new\n");
        }
    }

    // Snapshot the volatile only once.
    //
    uint32_t msec_current_time_snapshot = msec_current_time;

    int   uc_num = 0;
    conn *uc_cur = uc;

    while (uc_cur != NULL) {
        assert(uc_cur->cmd == -1);
        assert(uc_cur->item == NULL);
        assert(uc_cur->state == conn_pause);
        assert(IS_ASCII(uc_cur->protocol));
        assert(IS_PROXY(uc_cur->protocol));

        char *command = uc_cur->cmd_start;
        assert(command != NULL);

        char *space = strchr(command, ' ');
        assert(space > command);

        int cmd_len = space - command;
        assert(cmd_len == 3 || cmd_len == 4); // Either get or gets.

        int cas_emit = (command[3] == 's');

        if (settings.verbose > 1) {
            fprintf(stderr, "forward multiget %s (%d %d)\n",
                    command, cmd_len, uc_num);
        }

        while (space != NULL) {
            char *key = space + 1;
            char *next_space = strchr(key, ' ');
            int   key_len;

            if (next_space != NULL) {
                key_len = next_space - key;
            } else {
                key_len = strlen(key);

                // We've reached the last key.
                //
                psc_get->read_bytes += (key - command + key_len);
            }

            // This key_len check helps skip consecutive spaces.
            //
            if (key_len > 0) {
                ptd->stats.stats.tot_multiget_keys++;

                psc_get_key->seen++;
                psc_get_key->read_bytes += key_len;

                // Update key-based statistics.
                //
                bool do_key_stats =
                    matcher_check(&ptd->key_stats_matcher,
                                  key, key_len, true) == true &&
                    matcher_check(&ptd->key_stats_unmatcher,
                                  key, key_len, false) == false;

                if (do_key_stats) {
                    touch_key_stats(ptd, key, key_len,
                                    msec_current_time_snapshot,
                                    STATS_CMD_TYPE_REGULAR,
                                    STATS_CMD_GET_KEY,
                                    1, 0, 0,
                                    key_len, 0);
                }

                // Handle a front cache hit by queuing response.
                //
                // Note, front cache stats are part of mcache.
                //
                if (!cas_emit) {
                    item *it = mcache_get(front_cache, key, key_len,
                                          msec_current_time_snapshot);
                    if (it != NULL) {
                        assert(it->nkey == key_len);
                        assert(strncmp(ITEM_key(it), key, it->nkey) == 0);

                        cproxy_upstream_ascii_item_response(it, uc_cur, 0);

                        psc_get_key->hits++;
                        psc_get_key->write_bytes += it->nbytes;

                        if (do_key_stats) {
                            touch_key_stats(ptd, key, key_len,
                                            msec_current_time_snapshot,
                                            STATS_CMD_TYPE_REGULAR,
                                            STATS_CMD_GET_KEY,
                                            0, 1, 0,
                                            0, it->nbytes);
                        }

                        // The refcount was inc'ed by mcache_get() for us.
                        //
                        item_remove(it);

                        goto loop_next;
                    }
                }

                bool self = false;

                conn *c = cproxy_find_downstream_conn(d, key, key_len,
                                                      &self);
                if (c != NULL) {
                    if (self) {
                        // Optimization for talking with ourselves,
                        // to avoid extra network hop.
                        //
                        ptd->stats.stats.tot_optimize_self++;

                        item *it = item_get(key, key_len);
                        if (it != NULL) {
                            cproxy_upstream_ascii_item_response(it, uc_cur,
                                                                cas_emit);

                            psc_get_key->hits++;
                            psc_get_key->write_bytes += it->nbytes;

                            if (do_key_stats) {
                                touch_key_stats(ptd, key, key_len,
                                                msec_current_time_snapshot,
                                                STATS_CMD_TYPE_REGULAR,
                                                STATS_CMD_GET_KEY,
                                                0, 1, 0,
                                                0, it->nbytes);
                            }

                            // The refcount was inc'ed by item_get() for us.
                            //
                            item_remove(it);

                            if (settings.verbose > 1) {
                                fprintf(stderr,
                                        "optimize self multiget hit: %s\n",
                                        key);
                            }
                        } else {
                            psc_get_key->misses++;

                            if (do_key_stats) {
                                touch_key_stats(ptd, key, key_len,
                                                msec_current_time_snapshot,
                                                STATS_CMD_TYPE_REGULAR,
                                                STATS_CMD_GET_KEY,
                                                0, 0, 1,
                                                0, 0);
                            }

                            if (settings.verbose > 1) {
                                fprintf(stderr,
                                        "optimize self multiget miss: %s\n",
                                        key);
                            }
                        }

                        goto loop_next;
                    }

                    // See if we've already requested this key via
                    // the multiget hash table, in order to
                    // de-deplicate repeated keys.
                    //
                    bool first_request = true;

                    if (d->multiget != NULL) {
                        // TODO: Use Trond's allocator here.
                        //
                        multiget_entry *entry =
                            calloc(1, sizeof(multiget_entry));
                        if (entry != NULL) {
                            entry->upstream_conn = uc_cur;
                            entry->opaque = 0;
                            entry->hits = 0;
                            entry->next = genhash_find(d->multiget, key);

                            genhash_update(d->multiget, key, entry);

                            if (entry->next != NULL) {
                                first_request = false;
                            }
                        } else {
                            // TODO: Handle out of multiget entry memory.
                        }
                    }

                    if (first_request) {
                        assert(c->item == NULL);
                        assert(c->state == conn_pause);
                        assert(IS_PROXY(c->protocol));
                        assert(c->ilist != NULL);
                        assert(c->isize > 0);

                        if (c->msgused <= 1 &&
                            c->msgbytes <= 0) {
                            emit_start(c, command, cmd_len);
                        }

                        // Provide the preceding space as optimization
                        // for ascii-to-ascii configuration.
                        //
                        emit_skey(c, key - 1, key_len + 1);
                    } else {
                        ptd->stats.stats.tot_multiget_keys_dedupe++;

                        if (settings.verbose > 1) {
                            char buf[KEY_MAX_LENGTH + 10];
                            memcpy(buf, key, key_len);
                            buf[key_len] = '\0';

                            fprintf(stderr,
                                    "%d cproxy multiget dedpue: %s\n",
                                    uc_cur->sfd, buf);
                        }
                    }
                } else {
                    // TODO: Handle when downstream conn is down.
                }
            }

        loop_next:
            space = next_space;
        }

        uc_num++;
        uc_cur = uc_cur->next;
    }

    for (int i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            (c->msgused > 1 ||
             c->msgbytes > 0)) {
            emit_end(c);

            conn_set_state(c, conn_mwrite);
            c->write_and_go = conn_new_cmd;

            if (update_event(c, EV_WRITE | EV_PERSIST)) {
                nwrite++;

                if (uc->noreply) {
                    c->write_and_go = conn_pause;
                }
            } else {
                if (settings.verbose > 1) {
                    fprintf(stderr,
                            "Couldn't update cproxy write event\n");
                }

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(c);
            }
        }
    }

    if (settings.verbose > 1) {
        fprintf(stderr, "forward multiget nwrite %d out of %d\n",
                nwrite, nconns);
    }

    d->downstream_used_start = nwrite;
    d->downstream_used       = nwrite;

    if (cproxy_dettach_if_noreply(d, uc) == false) {
        d->upstream_suffix = "END\r\n";

        cproxy_start_downstream_timeout(d, NULL);
    }

    return nwrite > 0;
}
Example #9
0
/* Forward a simple one-liner command downstream.
 * For example, get, incr/decr, delete, etc.
 * The response, though, might be a simple line or
 * multiple VALUE+END lines.
 */
bool cproxy_forward_a2a_simple_downstream(downstream *d,
                                          char *command, conn *uc) {
    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(command != NULL);
    cb_assert(uc != NULL);
    cb_assert(uc->item == NULL);
    cb_assert(uc->cmd_curr != (protocol_binary_command) -1);
    cb_assert(d->multiget == NULL);
    cb_assert(d->merger == NULL);

    /* Handles get and gets. */

    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_GETK ||
        uc->cmd_curr == PROTOCOL_BINARY_CMD_GETKQ ||
        uc->cmd_curr == PROTOCOL_BINARY_CMD_GETL) {
        /* Only use front_cache for 'get', not for 'gets'. */

        mcache *front_cache =
            (command[3] == ' ') ? &d->ptd->proxy->front_cache : NULL;

        return multiget_ascii_downstream(d, uc,
                                         a2a_multiget_start,
                                         a2a_multiget_skey,
                                         a2a_multiget_end,
                                         front_cache);
    }

    cb_assert(uc->next == NULL);

    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH) {
        return cproxy_broadcast_a2a_downstream(d, command, uc,
                                               "OK\r\n");
    }

    if (uc->cmd_curr == PROTOCOL_BINARY_CMD_STAT) {
        if (strncmp(command + 5, " reset", 6) == 0) {
            return cproxy_broadcast_a2a_downstream(d, command, uc,
                                                   "RESET\r\n");
        }

        if (cproxy_broadcast_a2a_downstream(d, command, uc,
                                            "END\r\n")) {
            d->merger = genhash_init(512, skeyhash_ops);
            return true;
        } else {
            return false;
        }
    }

    /* TODO: Inefficient repeated scan_tokens. */

    int      cmd_len = 0;
    token_t  tokens[MAX_TOKENS];
    size_t   ntokens = scan_tokens(command, tokens, MAX_TOKENS, &cmd_len);
    char    *key     = tokens[KEY_TOKEN].value;
    int      key_len = tokens[KEY_TOKEN].length;

    if (ntokens <= 1) { /* This was checked long ago, while parsing */
        cb_assert(false);  /* the upstream conn. */
        return false;
    }

    /* Assuming we're already connected to downstream. */

    if (!strcmp(command, "version")) {
        /* fake key for version command handling */
        key = "v";
        key_len = 1;
    }

    conn *c = cproxy_find_downstream_conn(d, key, key_len, NULL);
    if (c != NULL) {

        if (cproxy_prep_conn_for_write(c)) {
            cb_assert(c->state == conn_pause);

            out_string(c, command);

            if (settings.verbose > 1) {
                moxi_log_write("forwarding to %d, noreply %d\n",
                        c->sfd, uc->noreply);
            }

            if (update_event(c, EV_WRITE | EV_PERSIST)) {
                d->downstream_used_start = 1;
                d->downstream_used       = 1;

                if (cproxy_dettach_if_noreply(d, uc) == false) {
                    cproxy_start_downstream_timeout(d, c);
                } else {
                    c->write_and_go = conn_pause;

                    /* Do mcache_delete() here only during a noreply, */
                    /* otherwise for with-reply requests, we could */
                    /* be in a race with other clients repopulating */
                    /* the front_cache.  For with-reply requests, we */
                    /* clear the front_cache when we get a success reply. */

                    cproxy_front_cache_delete(d->ptd, key, key_len);
                }

                return true;
            }

            if (settings.verbose > 1) {
                moxi_log_write("Couldn't update cproxy write event\n");
            }

            d->ptd->stats.stats.err_oom++;
            cproxy_close_conn(c);
        } else {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(c);
        }
    }

    return false;
}