Esempio n. 1
0
/* Used for broadcast commands, like flush_all or stats.
 */
bool cproxy_broadcast_a2a_downstream(downstream *d,
                                     char *command,
                                     conn *uc,
                                     char *suffix) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(d->downstream_used_start == 0);
    assert(d->downstream_used == 0);
    assert(command != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);
    assert(uc->item == NULL);

    int nwrite = 0;
    int nconns = mcs_server_count(&d->mst);

    for (int i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            c != NULL_CONN) {
            if (cproxy_prep_conn_for_write(c)) {
                assert(c->state == conn_pause);

                out_string(c, command);

                if (update_event(c, EV_WRITE | EV_PERSIST)) {
                    nwrite++;

                    if (uc->noreply) {
                        c->write_and_go = conn_pause;
                    }
                } else {
                    if (settings.verbose > 1) {
                        moxi_log_write("Update cproxy write event failed\n");
                    }

                    d->ptd->stats.stats.err_oom++;
                    cproxy_close_conn(c);
                }
            } else {
                d->ptd->stats.stats.err_downstream_write_prep++;
                cproxy_close_conn(c);
            }
        }
    }

    if (settings.verbose > 1) {
        moxi_log_write("%d: a2a broadcast nwrite %d out of %d\n",
                uc->sfd, nwrite, nconns);
    }

    if (nwrite > 0) {
        d->downstream_used_start = nwrite;
        d->downstream_used       = nwrite;

        if (cproxy_dettach_if_noreply(d, uc) == false) {
            d->upstream_suffix = suffix;
            d->upstream_suffix_len = 0;
            d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS;
            d->upstream_retry = 0;
            d->target_host_ident = NULL;

            cproxy_start_downstream_timeout(d, NULL);
        } else {
            // TODO: Handle flush_all's expiration parameter against
            // the front_cache.
            //
            if (strncmp(command, "flush_all", 9) == 0) {
                mcache_flush_all(&d->ptd->proxy->front_cache, 0);
            }
        }

        return true;
    }

    return false;
}
Esempio n. 2
0
/* Used for broadcast commands, like no-op, flush_all or stats.
 */
bool cproxy_broadcast_b2b_downstream(downstream *d, conn *uc) {
    int nwrite = 0;
    int nconns;
    int i;

    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(uc != NULL);
    cb_assert(uc->next == NULL);
    cb_assert(uc->noreply == false);

    nconns = mcs_server_count(&d->mst);

    for (i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            c != NULL_CONN &&
            b2b_forward_item_vbucket(uc, d, uc->item, c, -1) == true) {
            nwrite++;
        }
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: b2b broadcast nwrite %d out of %d\n",
                uc->sfd, nwrite, nconns);
    }

    if (nwrite > 0) {
        /* TODO: Handle binary 'stats reset' sub-command. */
        item *it;

        if (uc->cmd == PROTOCOL_BINARY_CMD_STAT &&
            d->merger == NULL) {
            d->merger = genhash_init(128, skeyhash_ops);
        }

        it = item_alloc("h", 1, 0, 0,
                              sizeof(protocol_binary_response_header));
        if (it != NULL) {
            protocol_binary_response_header *header =
                (protocol_binary_response_header *) ITEM_data(it);

            memset(ITEM_data(it), 0, it->nbytes);

            header->response.magic  = (uint8_t) PROTOCOL_BINARY_RES;
            header->response.opcode = uc->binary_header.request.opcode;
            header->response.opaque = uc->opaque;

            if (add_conn_item(uc, it)) {
                d->upstream_suffix     = ITEM_data(it);
                d->upstream_suffix_len = it->nbytes;
                d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS;
                d->target_host_ident = NULL;

                if (settings.verbose > 2) {
                    moxi_log_write("%d: b2b broadcast upstream_suffix", uc->sfd);
                    cproxy_dump_header(uc->sfd, ITEM_data(it));
                }

                /* TODO: Handle FLUSHQ (quiet binary flush_all). */

                d->downstream_used_start = nwrite;
                d->downstream_used       = nwrite;

                cproxy_start_downstream_timeout(d, NULL);

                return true;
            }

            item_remove(it);
        }
    }

    return false;
}
Esempio n. 3
0
/* Do the actual work of forwarding the command from an
 * upstream binary conn to its assigned binary downstream.
 */
bool cproxy_forward_b2b_downstream(downstream *d) {
    int nc;
    int server_index;
    conn *uc;

    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(d->downstream_used == 0);
    cb_assert(d->multiget == NULL);
    cb_assert(d->merger == NULL);

    d->downstream_used_start = 0;

    uc = d->upstream_conn;
    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream %x\n",
                uc->sfd, uc->cmd);
    }

    cb_assert(uc != NULL);
    cb_assert(uc->state == conn_pause);
    cb_assert(uc->cmd >= 0);
    cb_assert(uc->cmd_start == NULL);
    cb_assert(uc->thread != NULL);
    cb_assert(uc->thread->base != NULL);
    cb_assert(uc->noreply == false);
    cb_assert(IS_BINARY(uc->protocol));
    cb_assert(IS_PROXY(uc->protocol));

    server_index = -1;

    if (cproxy_is_broadcast_cmd(uc->cmd) == false && uc->corked == NULL) {
        item *it = uc->item;
        protocol_binary_request_header *req;
        char *key;
        int key_len;

        cb_assert(it != NULL);

        req = (protocol_binary_request_header *) ITEM_data(it);
        key = ((char *) req) + sizeof(*req) + req->request.extlen;
        key_len = ntohs(req->request.keylen);

        if (key_len > 0) {
            server_index = cproxy_server_index(d, key, key_len, NULL);
            if (server_index < 0) {
                return false;
            }
        }
    }

    nc = cproxy_connect_downstream(d, uc->thread, server_index);
    if (nc == -1) {
        return true;
    }

    if (nc > 0) {
        int i;
        int nconns;

        cb_assert(d->downstream_conns != NULL);

        if (d->usec_start == 0 &&
            d->ptd->behavior_pool.base.time_stats) {
            d->usec_start = usec_now();
        }

        nconns = mcs_server_count(&d->mst);
        for (i = 0; i < nconns; i++) {
            conn *c = d->downstream_conns[i];
            if (c != NULL &&
                c != NULL_CONN) {
                cb_assert(c->state == conn_pause);
                cb_assert(c->item == NULL);

                if (cproxy_prep_conn_for_write(c) == false) {
                    d->ptd->stats.stats.err_downstream_write_prep++;
                    cproxy_close_conn(c);

                    return false;
                }
            }
        }

        /* Uncork the saved-up quiet binary commands. */

        cproxy_binary_uncork_cmds(d, uc);

        if (uc->cmd == PROTOCOL_BINARY_CMD_FLUSH ||
            uc->cmd == PROTOCOL_BINARY_CMD_NOOP ||
            uc->cmd == PROTOCOL_BINARY_CMD_STAT) {
            return cproxy_broadcast_b2b_downstream(d, uc);
        }

        return cproxy_forward_b2b_simple_downstream(d, uc);
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream connect failed\n",
                uc->sfd);
    }

    return false;
}
bool multiget_ascii_downstream(downstream *d, conn *uc,
    int (*emit_start)(conn *c, char *cmd, int cmd_len),
    int (*emit_skey)(conn *c, char *skey, int skey_len),
    int (*emit_end)(conn *c),
    mcache *front_cache) {
    assert(d != NULL);
    assert(d->downstream_conns != NULL);
    assert(d->multiget == NULL);
    assert(uc != NULL);
    assert(uc->noreply == false);

    proxy_td *ptd = d->ptd;
    assert(ptd != NULL);

    proxy_stats_cmd *psc_get =
        &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET];
    proxy_stats_cmd *psc_get_key =
        &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET_KEY];

    int nwrite = 0;
    int nconns = mcs_server_count(&d->mst);

    for (int i = 0; i < nconns; i++) {
        if (d->downstream_conns[i] != NULL &&
            cproxy_prep_conn_for_write(d->downstream_conns[i]) == false) {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(d->downstream_conns[i]);
            return false;
        }
    }

    if (uc->next != NULL) {
        // More than one upstream conn, so we need a hashtable
        // to track keys for de-deplication.
        //
        d->multiget = genhash_init(128, skeyhash_ops);
        if (settings.verbose > 1) {
            fprintf(stderr, "cproxy multiget hash table new\n");
        }
    }

    // Snapshot the volatile only once.
    //
    uint32_t msec_current_time_snapshot = msec_current_time;

    int   uc_num = 0;
    conn *uc_cur = uc;

    while (uc_cur != NULL) {
        assert(uc_cur->cmd == -1);
        assert(uc_cur->item == NULL);
        assert(uc_cur->state == conn_pause);
        assert(IS_ASCII(uc_cur->protocol));
        assert(IS_PROXY(uc_cur->protocol));

        char *command = uc_cur->cmd_start;
        assert(command != NULL);

        char *space = strchr(command, ' ');
        assert(space > command);

        int cmd_len = space - command;
        assert(cmd_len == 3 || cmd_len == 4); // Either get or gets.

        int cas_emit = (command[3] == 's');

        if (settings.verbose > 1) {
            fprintf(stderr, "forward multiget %s (%d %d)\n",
                    command, cmd_len, uc_num);
        }

        while (space != NULL) {
            char *key = space + 1;
            char *next_space = strchr(key, ' ');
            int   key_len;

            if (next_space != NULL) {
                key_len = next_space - key;
            } else {
                key_len = strlen(key);

                // We've reached the last key.
                //
                psc_get->read_bytes += (key - command + key_len);
            }

            // This key_len check helps skip consecutive spaces.
            //
            if (key_len > 0) {
                ptd->stats.stats.tot_multiget_keys++;

                psc_get_key->seen++;
                psc_get_key->read_bytes += key_len;

                // Update key-based statistics.
                //
                bool do_key_stats =
                    matcher_check(&ptd->key_stats_matcher,
                                  key, key_len, true) == true &&
                    matcher_check(&ptd->key_stats_unmatcher,
                                  key, key_len, false) == false;

                if (do_key_stats) {
                    touch_key_stats(ptd, key, key_len,
                                    msec_current_time_snapshot,
                                    STATS_CMD_TYPE_REGULAR,
                                    STATS_CMD_GET_KEY,
                                    1, 0, 0,
                                    key_len, 0);
                }

                // Handle a front cache hit by queuing response.
                //
                // Note, front cache stats are part of mcache.
                //
                if (!cas_emit) {
                    item *it = mcache_get(front_cache, key, key_len,
                                          msec_current_time_snapshot);
                    if (it != NULL) {
                        assert(it->nkey == key_len);
                        assert(strncmp(ITEM_key(it), key, it->nkey) == 0);

                        cproxy_upstream_ascii_item_response(it, uc_cur, 0);

                        psc_get_key->hits++;
                        psc_get_key->write_bytes += it->nbytes;

                        if (do_key_stats) {
                            touch_key_stats(ptd, key, key_len,
                                            msec_current_time_snapshot,
                                            STATS_CMD_TYPE_REGULAR,
                                            STATS_CMD_GET_KEY,
                                            0, 1, 0,
                                            0, it->nbytes);
                        }

                        // The refcount was inc'ed by mcache_get() for us.
                        //
                        item_remove(it);

                        goto loop_next;
                    }
                }

                bool self = false;

                conn *c = cproxy_find_downstream_conn(d, key, key_len,
                                                      &self);
                if (c != NULL) {
                    if (self) {
                        // Optimization for talking with ourselves,
                        // to avoid extra network hop.
                        //
                        ptd->stats.stats.tot_optimize_self++;

                        item *it = item_get(key, key_len);
                        if (it != NULL) {
                            cproxy_upstream_ascii_item_response(it, uc_cur,
                                                                cas_emit);

                            psc_get_key->hits++;
                            psc_get_key->write_bytes += it->nbytes;

                            if (do_key_stats) {
                                touch_key_stats(ptd, key, key_len,
                                                msec_current_time_snapshot,
                                                STATS_CMD_TYPE_REGULAR,
                                                STATS_CMD_GET_KEY,
                                                0, 1, 0,
                                                0, it->nbytes);
                            }

                            // The refcount was inc'ed by item_get() for us.
                            //
                            item_remove(it);

                            if (settings.verbose > 1) {
                                fprintf(stderr,
                                        "optimize self multiget hit: %s\n",
                                        key);
                            }
                        } else {
                            psc_get_key->misses++;

                            if (do_key_stats) {
                                touch_key_stats(ptd, key, key_len,
                                                msec_current_time_snapshot,
                                                STATS_CMD_TYPE_REGULAR,
                                                STATS_CMD_GET_KEY,
                                                0, 0, 1,
                                                0, 0);
                            }

                            if (settings.verbose > 1) {
                                fprintf(stderr,
                                        "optimize self multiget miss: %s\n",
                                        key);
                            }
                        }

                        goto loop_next;
                    }

                    // See if we've already requested this key via
                    // the multiget hash table, in order to
                    // de-deplicate repeated keys.
                    //
                    bool first_request = true;

                    if (d->multiget != NULL) {
                        // TODO: Use Trond's allocator here.
                        //
                        multiget_entry *entry =
                            calloc(1, sizeof(multiget_entry));
                        if (entry != NULL) {
                            entry->upstream_conn = uc_cur;
                            entry->opaque = 0;
                            entry->hits = 0;
                            entry->next = genhash_find(d->multiget, key);

                            genhash_update(d->multiget, key, entry);

                            if (entry->next != NULL) {
                                first_request = false;
                            }
                        } else {
                            // TODO: Handle out of multiget entry memory.
                        }
                    }

                    if (first_request) {
                        assert(c->item == NULL);
                        assert(c->state == conn_pause);
                        assert(IS_PROXY(c->protocol));
                        assert(c->ilist != NULL);
                        assert(c->isize > 0);

                        if (c->msgused <= 1 &&
                            c->msgbytes <= 0) {
                            emit_start(c, command, cmd_len);
                        }

                        // Provide the preceding space as optimization
                        // for ascii-to-ascii configuration.
                        //
                        emit_skey(c, key - 1, key_len + 1);
                    } else {
                        ptd->stats.stats.tot_multiget_keys_dedupe++;

                        if (settings.verbose > 1) {
                            char buf[KEY_MAX_LENGTH + 10];
                            memcpy(buf, key, key_len);
                            buf[key_len] = '\0';

                            fprintf(stderr,
                                    "%d cproxy multiget dedpue: %s\n",
                                    uc_cur->sfd, buf);
                        }
                    }
                } else {
                    // TODO: Handle when downstream conn is down.
                }
            }

        loop_next:
            space = next_space;
        }

        uc_num++;
        uc_cur = uc_cur->next;
    }

    for (int i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            (c->msgused > 1 ||
             c->msgbytes > 0)) {
            emit_end(c);

            conn_set_state(c, conn_mwrite);
            c->write_and_go = conn_new_cmd;

            if (update_event(c, EV_WRITE | EV_PERSIST)) {
                nwrite++;

                if (uc->noreply) {
                    c->write_and_go = conn_pause;
                }
            } else {
                if (settings.verbose > 1) {
                    fprintf(stderr,
                            "Couldn't update cproxy write event\n");
                }

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(c);
            }
        }
    }

    if (settings.verbose > 1) {
        fprintf(stderr, "forward multiget nwrite %d out of %d\n",
                nwrite, nconns);
    }

    d->downstream_used_start = nwrite;
    d->downstream_used       = nwrite;

    if (cproxy_dettach_if_noreply(d, uc) == false) {
        d->upstream_suffix = "END\r\n";

        cproxy_start_downstream_timeout(d, NULL);
    }

    return nwrite > 0;
}