Beispiel #1
0
void slabs_alloc_test(void) {
    unsigned int total_chunk = 0;
    
    const char *key = "charliezhao";
    size_t nkey = strlen(key) + 1;
    item *ptr = (item *)slabs_alloc(1024, slabs_clsid(1024), &total_chunk);
    strcpy(ITEM_key(ptr), key);   
    ptr->nkey = nkey;
    strcpy(ITEM_data(ptr), "xuechaozhao");
    uint32_t hv = jenkins_hash(key, strlen(key));
    assoc_insert(ptr, hv);

    for(int i = 0; i <= 10922; ++i) {
        void *ptr = slabs_alloc(96, slabs_clsid(96), &total_chunk);
        if(ptr == NULL) {
            fprintf(stderr, "i: %7d slabs_alloc fail\n", 
                    i);
            break;
        }
        else {
            slabs_free(ptr, 96, slabs_clsid(96)); 
        }
    }

    item *ptr2 = assoc_find(key, nkey, hv);
    fprintf(stdout, "key:%20s value:%20s\n", ITEM_key(ptr2), ITEM_data(ptr2)); 
}
Beispiel #2
0
bool b2b_forward_item_vbucket(conn *uc, downstream *d, item *it,
                              conn *c, bool self, int vbucket) {
    (void)self;
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);
    assert(uc->noreply == false);
    assert(c != NULL);

    // Assuming we're already connected to downstream.
    //
    // TODO: Optimize to self codepath.
    //
    if (settings.verbose > 2) {
        moxi_log_write("%d: b2b_forward_item_vbucket %x to %d, vbucket %d\n",
                uc->sfd, uc->cmd, c->sfd, vbucket);
    }

    protocol_binary_request_header *req =
        (protocol_binary_request_header *) ITEM_data(it);

    if (vbucket >= 0) {
        req->request.reserved = htons(vbucket);
    }

    if (add_conn_item(c, it) == true) {
        // The caller keeps its refcount, and we need our own.
        //
        it->refcount++;

        if (add_iov(c, ITEM_data(it), it->nbytes) == 0) {
            conn_set_state(c, conn_mwrite);
            c->write_and_go = conn_new_cmd;

            if (update_event(c, EV_WRITE | EV_PERSIST)) {
                if (settings.verbose > 2) {
                    moxi_log_write("%d: b2b_forward %x to %d success\n",
                            uc->sfd, uc->cmd, c->sfd);
                }

                return true;
            }
        }
    }

    d->ptd->stats.stats.err_oom++;
    cproxy_close_conn(c);

    return false;
}
Beispiel #3
0
int item_test() {
    int maxi = 0; 
    
    //test set.
    for(int i = 0; i < 10; i++) {
        char key[1024];
        memset(key, 0, 1024);
        sprintf(key, "charlie_%d", i);
        const size_t nkey = strlen(key) + 1;
        const int flags = 0;
        const time_t exptime = 0;
        const int nbytes = 1024; 
        uint32_t cur_hv = jenkins_hash((void *)key, nkey);
        item *it = do_item_alloc((const char *)key, nkey, flags, exptime, nbytes, cur_hv);
        if(it == NULL) {
            fprintf(stderr, "\033[31malloc fail\033[0m");
            maxi = i;
            break; 
        }
        char val[1024];
        sprintf(val, "%d", i);
        memcpy(ITEM_data(it), (void *)&val, strlen(val)+1);
    }
    
    //test get.
    for(int i = 0; i < 10; ++i) {
        char key[1024];
        memset(key, 0, 1024);
        sprintf(key, "charlie_%d", i); 
        const size_t nkey = strlen(key) + 1;
        uint32_t cur_hv = jenkins_hash((void *)key, nkey);
        item *it = assoc_find(key, nkey, cur_hv);
        if(it == NULL) {
            fprintf(stderr, "\033[31mget fail\033[0m");
            return -1;
        }
        int val = 0;
        memcpy((void *)&val, ITEM_data(it), sizeof(val));
        if(i&0x1) {
            fprintf(stdout, "del key:%s value:%d\n", ITEM_key(it), val);
            do_item_unlink(it, cur_hv); 
            lru_traverse(NULL);
        }
        
    }
    return 0;
}
Beispiel #4
0
static void do_slabs_free_chunked(item *it, const size_t size, unsigned int id,
                                  slabclass_t *p) {
    item_chunk *chunk = (item_chunk *) ITEM_data(it);
    size_t realsize = size;
    while (chunk) {
        realsize += sizeof(item_chunk);
        chunk = chunk->next;
    }
    chunk = (item_chunk *) ITEM_data(it);
    unsigned int chunks_found = 1;

    it->it_flags = ITEM_SLABBED;
    it->slabs_clsid = 0;
    it->prev = 0;
    it->next = (item *) chunk->next;
    assert(it->next);
    /* top chunk should already point back to head */
    assert(it->next && (void*)it->next->prev == (void*)chunk);
    chunk = chunk->next;
    chunk->prev = (item_chunk *)it;

    while (chunk) {
        assert(chunk->it_flags == ITEM_CHUNK);
        chunk->it_flags = ITEM_SLABBED;
        chunk->slabs_clsid = 0;
        chunks_found++;
        if (chunk->next) {
            chunk = chunk->next;
        } else {
            break;
        }
    }
    /* must have had nothing hanging off of the final chunk */
    assert(chunk && chunk->next == 0);
    /* Tail chunk, link the freelist here. */
    chunk->next = p->slots;
    if (chunk->next) chunk->next->prev = chunk;

    p->slots = it;
    p->sl_curr += chunks_found;
    p->requested -= size;

    return;
}
Beispiel #5
0
bool b2b_forward_item(conn *uc, downstream *d, item *it) {
    int  vbucket = -1;
    bool local;
    conn *c;
    protocol_binary_request_header *req;
    char *key;
    int keylen;

    cb_assert(uc != NULL);
    cb_assert(uc->next == NULL);
    cb_assert(uc->noreply == false);
    cb_assert(it != NULL);

    req = (protocol_binary_request_header *) ITEM_data(it);
    key = ((char *) req) + sizeof(*req) + req->request.extlen;
    keylen = ntohs(req->request.keylen);

    if (settings.verbose > 2) {
        char buf[300];
        memcpy(buf, key, keylen);
        buf[keylen] = '\0';

        moxi_log_write("%d: b2b_forward_item nbytes %u, extlen %d, keylen %d opcode %x key (%s)\n",
                       uc->sfd, it->nbytes, req->request.extlen, keylen, req->request.opcode, buf);

        cproxy_dump_header(uc->sfd, (char *) req);
    }

    if (key == NULL ||
        keylen <= 0) {
        return false; /* We don't know how to hash an empty key. */
    }


    c = cproxy_find_downstream_conn_ex(d, key, keylen, &local, &vbucket);
    if (c != NULL) {
        if (local) {
            uc->hit_local = true;
        }
        if (b2b_forward_item_vbucket(uc, d, it, c, vbucket) == true) {
            d->downstream_used_start = 1;
            d->downstream_used = 1;

            cproxy_start_downstream_timeout(d, c);

            return true;
        }
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: b2b_forward_item failed (%d)\n",
                uc->sfd, (c != NULL));
    }

    return false;
}
Beispiel #6
0
void complete_nread(conn *c) {
    item *it = c->item;
    int comm = c->item_comm;
    item *old_it;
    time_t now = time(0);

    stats.set_cmds++;

    while(1) {
        if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) != 0) {
            out_string(c, "CLIENT_ERROR bad data chunk");
            break;
        }

        old_it = assoc_find(ITEM_key(it));

        if (old_it && settings.oldest_live &&
                old_it->time <= settings.oldest_live) {
            item_unlink(old_it);
            old_it = 0;
        }

        if (old_it && old_it->exptime && old_it->exptime < now) {
            item_unlink(old_it);
            old_it = 0;
        }

        if (old_it && comm==NREAD_ADD) {
            item_update(old_it);
            out_string(c, "NOT_STORED");
            break;
        }

        if (!old_it && comm == NREAD_REPLACE) {
            out_string(c, "NOT_STORED");
            break;
        }

        if (old_it && (old_it->it_flags & ITEM_DELETED) && (comm == NREAD_REPLACE || comm == NREAD_ADD)) {
            out_string(c, "NOT_STORED");
            break;
        }

        if (old_it) {
            item_replace(old_it, it);
        } else item_link(it);

        c->item = 0;
        out_string(c, "STORED");
        return;
    }

    item_free(it);
    c->item = 0;
    return;
}
Beispiel #7
0
static void do_slabs_free_chunked(item *it, const size_t size) {
    item_chunk *chunk = (item_chunk *) ITEM_data(it);
    slabclass_t *p;

    it->it_flags = ITEM_SLABBED;
    it->slabs_clsid = 0;
    it->prev = 0;
    // header object's original classid is stored in chunk.
    p = &slabclass[chunk->orig_clsid];
    if (chunk->next) {
        chunk = chunk->next;
        chunk->prev = 0;
    } else {
        // header with no attached chunk
        chunk = NULL;
    }

    // return the header object.
    // TODO: This is in three places, here and in do_slabs_free().
    it->prev = 0;
    it->next = p->slots;
    if (it->next) it->next->prev = it;
    p->slots = it;
    p->sl_curr++;
    // TODO: macro
    p->requested -= it->nkey + 1 + it->nsuffix + sizeof(item) + sizeof(item_chunk);
    if (settings.use_cas) {
        p->requested -= sizeof(uint64_t);
    }

    item_chunk *next_chunk;
    while (chunk) {
        assert(chunk->it_flags == ITEM_CHUNK);
        chunk->it_flags = ITEM_SLABBED;
        p = &slabclass[chunk->slabs_clsid];
        chunk->slabs_clsid = 0;
        next_chunk = chunk->next;

        chunk->prev = 0;
        chunk->next = p->slots;
        if (chunk->next) chunk->next->prev = chunk;
        p->slots = chunk;
        p->sl_curr++;
        p->requested -= chunk->size + sizeof(item_chunk);

        chunk = next_chunk;
    }

    return;
}
Beispiel #8
0
int process_get_command ( char *key, size_t nkey, char *dst, int *len )
{
    item *it;
    char *src = NULL;
    it = item_get (key, nkey);
    if ( it )
    {
        item_update (it);
        src = ITEM_data (it);
        *len = it->nbytes - 2;
        memcpy (dst, src, it->nbytes);
        item_remove (it);
        return true;
    }
    else
    {
        //item_remove(it);
        return false;
    }
}
Beispiel #9
0
/* We get here after reading the value in set/add/replace
 * commands. The command has been stored in c->cmd, and
 * the item is ready in c->item.
 */
void cproxy_process_upstream_ascii_nread(conn *c) {
    assert(c != NULL);
    assert(c->next == NULL);

    item *it = c->item;

    assert(it != NULL);

    // pthread_mutex_lock(&c->thread->stats.mutex);
    // c->thread->stats.slab_stats[it->slabs_clsid].set_cmds++;
    // pthread_mutex_unlock(&c->thread->stats.mutex);

    if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) {
        proxy_td *ptd = c->extra;

        assert(ptd != NULL);

        cproxy_pause_upstream_for_downstream(ptd, c);
    } else {
        out_string(c, "CLIENT_ERROR bad data chunk");
    }
}
Beispiel #10
0
void drive_machine(conn *c) {

    int exit = 0;
    int sfd, flags = 1;
    socklen_t addrlen;
    struct sockaddr addr;
    conn *newc;
    int res;

    while (!exit) {
        /* printf("state %d\n", c->state);*/
        switch(c->state) {
        case conn_listening:
            addrlen = sizeof(addr);
            if ((sfd = accept(c->sfd, &addr, &addrlen)) == -1) {
                if (errno == EAGAIN || errno == EWOULDBLOCK) {
                    exit = 1;
                    break;
                } else {
                    perror("accept()");
                }
                break;
            }
            if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
                    fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
                perror("setting O_NONBLOCK");
                close(sfd);
                break;
            }
            newc = conn_new(sfd, conn_read, EV_READ | EV_PERSIST);
            if (!newc) {
                if (settings.verbose > 0)
                    fprintf(stderr, "couldn't create new connection\n");
                close(sfd);
                break;
            }

            break;

        case conn_read:
            if (try_read_command(c)) {
                continue;
            }
            if (try_read_network(c)) {
                continue;
            }
            /* we have no command line and no data to read from network */
            if (!update_event(c, EV_READ | EV_PERSIST)) {
                if (settings.verbose > 0)
                    fprintf(stderr, "Couldn't update event\n");
                c->state = conn_closing;
                break;
            }
            exit = 1;
            break;

        case conn_nread:
            /* we are reading rlbytes into rcurr; */
            if (c->rlbytes == 0) {
                complete_nread(c);
                break;
            }
            /* first check if we have leftovers in the conn_read buffer */
            if (c->rbytes > 0) {
                int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
                memcpy(c->rcurr, c->rbuf, tocopy);
                c->rcurr += tocopy;
                c->rlbytes -= tocopy;
                if (c->rbytes > tocopy) {
                    memmove(c->rbuf, c->rbuf+tocopy, c->rbytes - tocopy);
                }
                c->rbytes -= tocopy;
                break;
            }

            /*  now try reading from the socket */
            res = read(c->sfd, c->rcurr, c->rlbytes);
            if (res > 0) {
                stats.bytes_read += res;
                c->rcurr += res;
                c->rlbytes -= res;
                break;
            }
            if (res == 0) { /* end of stream */
                c->state = conn_closing;
                break;
            }
            if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
                if (!update_event(c, EV_READ | EV_PERSIST)) {
                    if (settings.verbose > 0)
                        fprintf(stderr, "Couldn't update event\n");
                    c->state = conn_closing;
                    break;
                }
                exit = 1;
                break;
            }
            /* otherwise we have a real error, on which we close the connection */
            if (settings.verbose > 0)
                fprintf(stderr, "Failed to read, and not due to blocking\n");
            c->state = conn_closing;
            break;

        case conn_swallow:
            /* we are reading sbytes and throwing them away */
            if (c->sbytes == 0) {
                c->state = conn_read;
                break;
            }

            /* first check if we have leftovers in the conn_read buffer */
            if (c->rbytes > 0) {
                int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
                c->sbytes -= tocopy;
                if (c->rbytes > tocopy) {
                    memmove(c->rbuf, c->rbuf+tocopy, c->rbytes - tocopy);
                }
                c->rbytes -= tocopy;
                break;
            }

            /*  now try reading from the socket */
            res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize);
            if (res > 0) {
                stats.bytes_read += res;
                c->sbytes -= res;
                break;
            }
            if (res == 0) { /* end of stream */
                c->state = conn_closing;
                break;
            }
            if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
                if (!update_event(c, EV_READ | EV_PERSIST)) {
                    if (settings.verbose > 0)
                        fprintf(stderr, "Couldn't update event\n");
                    c->state = conn_closing;
                    break;
                }
                exit = 1;
                break;
            }
            /* otherwise we have a real error, on which we close the connection */
            if (settings.verbose > 0)
                fprintf(stderr, "Failed to read, and not due to blocking\n");
            c->state = conn_closing;
            break;

        case conn_write:
            /* we are writing wbytes bytes starting from wcurr */
            if (c->wbytes == 0) {
                if (c->write_and_free) {
                    free(c->write_and_free);
                    c->write_and_free = 0;
                }
                c->state = c->write_and_go;
                if (c->state == conn_read)
                    set_cork(c, 0);
                break;
            }
            res = write(c->sfd, c->wcurr, c->wbytes);
            if (res > 0) {
                stats.bytes_written += res;
                c->wcurr  += res;
                c->wbytes -= res;
                break;
            }
            if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
                if (!update_event(c, EV_WRITE | EV_PERSIST)) {
                    if (settings.verbose > 0)
                        fprintf(stderr, "Couldn't update event\n");
                    c->state = conn_closing;
                    break;
                }
                exit = 1;
                break;
            }
            /* if res==0 or res==-1 and error is not EAGAIN or EWOULDBLOCK,
               we have a real error, on which we close the connection */
            if (settings.verbose > 0)
                fprintf(stderr, "Failed to write, and not due to blocking\n");
            c->state = conn_closing;
            break;
        case conn_mwrite:
            /*
             * we're writing ibytes bytes from iptr. iptr alternates between
             * ibuf, where we build a string "VALUE...", and ITEM_data(it) for the
             * current item. When we finish a chunk, we choose the next one using
             * ipart, which has the following semantics: 0 - start the loop, 1 -
             * we finished ibuf, go to current ITEM_data(it); 2 - we finished ITEM_data(it),
             * move to the next item and build its ibuf; 3 - we finished all items,
             * write "END".
             */
            if (c->ibytes > 0) {
                res = write(c->sfd, c->iptr, c->ibytes);
                if (res > 0) {
                    stats.bytes_written += res;
                    c->iptr += res;
                    c->ibytes -= res;
                    break;
                }
                if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
                    if (!update_event(c, EV_WRITE | EV_PERSIST)) {
                        if (settings.verbose > 0)
                            fprintf(stderr, "Couldn't update event\n");
                        c->state = conn_closing;
                        break;
                    }
                    exit = 1;
                    break;
                }
                /* if res==0 or res==-1 and error is not EAGAIN or EWOULDBLOCK,
                   we have a real error, on which we close the connection */
                if (settings.verbose > 0)
                    fprintf(stderr, "Failed to write, and not due to blocking\n");
                c->state = conn_closing;
                break;
            } else {
                item *it;
                /* we finished a chunk, decide what to do next */
                switch (c->ipart) {
                case 1:
                    it = *(c->icurr);
                    assert((it->it_flags & ITEM_SLABBED) == 0);
                    c->iptr = ITEM_data(it);
                    c->ibytes = it->nbytes;
                    c->ipart = 2;
                    break;
                case 2:
                    it = *(c->icurr);
                    item_remove(it);
                    c->ileft--;
                    if (c->ileft <= 0) {
                        c->ipart = 3;
                        break;
                    } else {
                        c->icurr++;
                    }
                /* FALL THROUGH */
                case 0:
                    it = *(c->icurr);
                    assert((it->it_flags & ITEM_SLABBED) == 0);
                    c->ibytes = sprintf(c->ibuf, "VALUE %s %u %u\r\n", ITEM_key(it), it->flags, it->nbytes - 2);
                    if (settings.verbose > 1)
                        fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it));
                    c->iptr = c->ibuf;
                    c->ipart = 1;
                    break;
                case 3:
                    out_string(c, "END");
                    break;
                }
            }
            break;

        case conn_closing:
            conn_close(c);
            exit = 1;
            break;
        }

    }

    return;
}
Beispiel #11
0
/**
 * @param cas_emit  1: emit CAS.
 *                  0: do not emit CAS.
 *                 -1: data driven.
 */
void cproxy_upstream_ascii_item_response(item *it, conn *uc,
        int cas_emit) {
    assert(it != NULL);
    assert(uc != NULL);
    assert(uc->state == conn_pause);
    assert(uc->funcs != NULL);
    assert(IS_ASCII(uc->protocol));
    assert(IS_PROXY(uc->protocol));

    if (settings.verbose > 2) {
        char key[KEY_MAX_LENGTH + 10];
        assert(it->nkey <= KEY_MAX_LENGTH);
        memcpy(key, ITEM_key(it), it->nkey);
        key[it->nkey] = '\0';

        moxi_log_write("<%d cproxy ascii item response, key %s\n",
                       uc->sfd, key);
    }

    if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) == 0) {
        // TODO: Need to clean up half-written add_iov()'s.
        //       Consider closing the upstream_conns?
        //
        uint64_t cas = ITEM_get_cas(it);
        if ((cas_emit == 0) ||
                (cas_emit < 0 &&
                 cas == CPROXY_NOT_CAS)) {
            if (add_conn_item(uc, it)) {
                it->refcount++;

                if (add_iov(uc, "VALUE ", 6) == 0 &&
                        add_iov(uc, ITEM_key(it), it->nkey) == 0 &&
                        add_iov(uc, ITEM_suffix(it),
                                it->nsuffix + it->nbytes) == 0) {
                    if (settings.verbose > 2) {
                        moxi_log_write("<%d cproxy ascii item response success\n",
                                       uc->sfd);
                    }
                }
            }
        } else {
            char *suffix = add_conn_suffix(uc);
            if (suffix != NULL) {
                sprintf(suffix, " %llu\r\n", (unsigned long long) cas);

                if (add_conn_item(uc, it)) {
                    it->refcount++;

                    if (add_iov(uc, "VALUE ", 6) == 0 &&
                            add_iov(uc, ITEM_key(it), it->nkey) == 0 &&
                            add_iov(uc, ITEM_suffix(it),
                                    it->nsuffix - 2) == 0 &&
                            add_iov(uc, suffix, strlen(suffix)) == 0 &&
                            add_iov(uc, ITEM_data(it), it->nbytes) == 0) {
                        if (settings.verbose > 2) {
                            moxi_log_write("<%d cproxy ascii item response ok\n",
                                           uc->sfd);
                        }
                    }
                }
            }
        }
    } else {
        if (settings.verbose > 1) {
            moxi_log_write("ERROR: unexpected downstream data block");
        }
    }
}
Beispiel #12
0
enum store_item_type do_store_item ( item *it, int comm, const uint32_t hv )
{
    char *key = ITEM_key (it);
    item *old_it = do_item_get (key, it->nkey, hv);
    enum store_item_type stored = NOT_STORED;

    item *new_it = NULL;
    int flags;

    if ( old_it != NULL && comm == NREAD_ADD )
    {
        do_item_update (old_it);
    }
    else if ( ! old_it && ( comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND ) )
    {

    }
    else
    {
        if ( comm == NREAD_APPEND || comm == NREAD_PREPEND )
        {
            if ( stored == NOT_STORED )
            {
                flags = ( int ) strtol (ITEM_suffix (old_it), ( char ** ) NULL, 10);
                new_it = do_item_alloc (key, it->nkey, flags, old_it->exptime, ITEM_data (it), it->nbytes + old_it->nbytes - 2, hv);
                if ( ! new_it )
                {
                    if ( old_it )
                        do_item_remove (old_it);
                    return NOT_STORED;
                }
                if ( comm == NREAD_APPEND )
                {
                    memcpy (ITEM_data (new_it), ITEM_data (old_it), old_it->nbytes);
                    memcpy (ITEM_data (new_it) + old_it->nbytes - 2, ITEM_data (it), it->nbytes);
                }
                else
                {
                    memcpy (ITEM_data (new_it), ITEM_data (it), it->nbytes);
                    memcpy (ITEM_data (new_it) + it->nbytes - 2, ITEM_data (old_it), old_it->nbytes);
                }
                it = new_it;
            }
        }
        if ( stored == NOT_STORED )
        {
            if ( old_it != NULL )
            {
                item_replace (old_it, it, hv);
            }
            else
            {
                do_item_link (it, hv);
            }
            stored = STORED;
        }
    }
    if ( old_it != NULL )
    {
        do_item_remove (old_it);
    }
    if ( new_it != NULL )
    {
        do_item_remove (new_it);
    }
    return stored;
}
Beispiel #13
0
void protocol_stats_foreach_write(const void *key,
                                  const void *value,
                                  void *user_data) {

    char *line = (char *) value;
    conn *uc = (conn *) user_data;
    int nline;
    cb_assert(line != NULL);
    cb_assert(uc != NULL);
    (void)key;

    nline = strlen(line);
    if (nline > 0) {
        item *it;
        if (settings.verbose > 2) {
            moxi_log_write("%d: cproxy_stats writing: %s\n", uc->sfd, line);
        }

        if (IS_BINARY(uc->protocol)) {
            token_t line_tokens[MAX_TOKENS];
            size_t  line_ntokens = scan_tokens(line, line_tokens, MAX_TOKENS, NULL);

            if (line_ntokens == 4) {
                uint16_t key_len  = line_tokens[NAME_TOKEN].length;
                uint32_t data_len = line_tokens[VALUE_TOKEN].length;

                it = item_alloc("s", 1, 0, 0,
                                sizeof(protocol_binary_response_stats) + key_len + data_len);
                if (it != NULL) {
                    protocol_binary_response_stats *header =
                        (protocol_binary_response_stats *) ITEM_data(it);

                    memset(ITEM_data(it), 0, it->nbytes);

                    header->message.header.response.magic = (uint8_t) PROTOCOL_BINARY_RES;
                    header->message.header.response.opcode = uc->binary_header.request.opcode;
                    header->message.header.response.keylen  = (uint16_t) htons(key_len);
                    header->message.header.response.bodylen = htonl(key_len + data_len);
                    header->message.header.response.opaque  = uc->opaque;

                    memcpy((ITEM_data(it)) + sizeof(protocol_binary_response_stats),
                           line_tokens[NAME_TOKEN].value, key_len);
                    memcpy((ITEM_data(it)) + sizeof(protocol_binary_response_stats) + key_len,
                           line_tokens[VALUE_TOKEN].value, data_len);

                    if (add_conn_item(uc, it)) {
                        add_iov(uc, ITEM_data(it), it->nbytes);

                        if (settings.verbose > 2) {
                            moxi_log_write("%d: cproxy_stats writing binary", uc->sfd);
                            cproxy_dump_header(uc->sfd, ITEM_data(it));
                        }

                        return;
                    }

                    item_remove(it);
                }
            }

            return;
        }

        it = item_alloc("s", 1, 0, 0, nline + 2);
        if (it != NULL) {
            strncpy(ITEM_data(it), line, nline);
            strncpy(ITEM_data(it) + nline, "\r\n", 2);

            if (add_conn_item(uc, it)) {
                add_iov(uc, ITEM_data(it), nline + 2);
                return;
            }

            item_remove(it);
        }
    }
}
Beispiel #14
0
void cproxy_process_a2a_downstream(conn *c, char *line) {
    assert(c != NULL);
    assert(c->next == NULL);
    assert(c->extra != NULL);
    assert(c->cmd == -1);
    assert(c->item == NULL);
    assert(line != NULL);
    assert(line == c->rcurr);
    assert(IS_ASCII(c->protocol));
    assert(IS_PROXY(c->protocol));

    if (settings.verbose > 1)
        fprintf(stderr, "<%d cproxy_process_a2a_downstream %s\n",
                c->sfd, line);

    downstream *d = c->extra;

    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);

    if (strncmp(line, "VALUE ", 6) == 0) {
        token_t      tokens[MAX_TOKENS];
        size_t       ntokens;
        unsigned int flags;
        int          clen = 0;
        int          vlen;
        uint64_t     cas = CPROXY_NOT_CAS;

        ntokens = scan_tokens(line, tokens, MAX_TOKENS, &clen);
        if (ntokens >= 5 && // Accounts for extra termimation token.
            ntokens <= 6 &&
            tokens[KEY_TOKEN].length <= KEY_MAX_LENGTH &&
            safe_strtoul(tokens[2].value, (uint32_t *) &flags) &&
            safe_strtoul(tokens[3].value, (uint32_t *) &vlen)) {
            char  *key  = tokens[KEY_TOKEN].value;
            size_t nkey = tokens[KEY_TOKEN].length;

            item *it = item_alloc(key, nkey, flags, 0, vlen + 2);
            if (it != NULL) {
                if (ntokens == 5 ||
                    safe_strtoull(tokens[4].value, &cas)) {
                    ITEM_set_cas(it, cas);

                    c->item = it;
                    c->ritem = ITEM_data(it);
                    c->rlbytes = it->nbytes;
                    c->cmd = -1;

                    conn_set_state(c, conn_nread);

                    return; // Success.
                } else {
                    if (settings.verbose > 1)
                        fprintf(stderr, "cproxy could not parse cas\n");
                }
            } else {
                if (settings.verbose > 1)
                    fprintf(stderr, "cproxy could not item_alloc size %u\n",
                            vlen + 2);
            }

            if (it != NULL)
                item_remove(it);
            it = NULL;

            c->sbytes = vlen + 2; // Number of bytes to swallow.

            conn_set_state(c, conn_swallow);

            // Note, eventually, we'll see an END later.
        } else {
            // We don't know how much to swallow, so close the downstream.
            // The conn_closing should release the downstream,
            // which should write a suffix/error to the upstream.
            //
            conn_set_state(c, conn_closing);
        }
    } else if (strncmp(line, "END", 3) == 0) {
        conn_set_state(c, conn_pause);
    } else if (strncmp(line, "OK", 2) == 0) {
        conn_set_state(c, conn_pause);

        // TODO: Handle flush_all's expiration parameter against
        // the front_cache.
        //
        // TODO: We flush the front_cache too often, inefficiently
        // on every downstream flush_all OK response, rather than
        // on just the last flush_all OK response.
        //
        conn *uc = d->upstream_conn;
        if (uc != NULL &&
            uc->cmd_curr == PROTOCOL_BINARY_CMD_FLUSH) {
            mcache_flush_all(&d->ptd->proxy->front_cache, 0);
        }
    } else if (strncmp(line, "STAT ", 5) == 0 ||
               strncmp(line, "ITEM ", 5) == 0 ||
               strncmp(line, "PREFIX ", 7) == 0) {
        assert(d->merger != NULL);

        conn *uc = d->upstream_conn;
        if (uc != NULL) {
            assert(uc->next == NULL);

            if (protocol_stats_merge_line(d->merger, line) == false) {
                // Forward the line as-is if we couldn't merge it.
                //
                int nline = strlen(line);

                item *it = item_alloc("s", 1, 0, 0, nline + 2);
                if (it != NULL) {
                    strncpy(ITEM_data(it), line, nline);
                    strncpy(ITEM_data(it) + nline, "\r\n", 2);

                    if (add_conn_item(uc, it)) {
                        add_iov(uc, ITEM_data(it), nline + 2);

                        it = NULL;
                    }

                    if (it != NULL)
                        item_remove(it);
                }
            }
        }

        conn_set_state(c, conn_new_cmd);
    } else {
        conn_set_state(c, conn_pause);

        // The upstream conn might be NULL when closed already
        // or while handling a noreply.
        //
        conn *uc = d->upstream_conn;
        if (uc != NULL) {
            assert(uc->next == NULL);

            out_string(uc, line);

            if (!update_event(uc, EV_WRITE | EV_PERSIST)) {
                if (settings.verbose > 1)
                    fprintf(stderr,
                            "Can't update upstream write event\n");

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(uc);
            }

            cproxy_del_front_cache_key_ascii_response(d, line,
                                                      uc->cmd_start);
        }
    }
}
Beispiel #15
0
/* We get here after reading the header+body into an item.
 */
void cproxy_process_upstream_binary_nread(conn *c) {
    cb_assert(c != NULL);
    cb_assert(c->cmd >= 0);
    cb_assert(c->next == NULL);
    cb_assert(c->cmd_start == NULL);
    cb_assert(IS_BINARY(c->protocol));
    cb_assert(IS_PROXY(c->protocol));

    protocol_binary_request_header *header =
        (protocol_binary_request_header *) &c->binary_header;

    int      extlen  = header->request.extlen;
    int      keylen  = header->request.keylen;
    uint32_t bodylen = header->request.bodylen;

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_upstream_binary_nread %x %d %d %u\n",
                c->sfd, c->cmd, extlen, keylen, bodylen);
    }

    /* pthread_mutex_lock(&c->thread->stats.mutex); */
    /* c->thread->stats.slab_stats[it->slabs_clsid].set_cmds++; */
    /* pthread_mutex_unlock(&c->thread->stats.mutex); */

    proxy_td *ptd = c->extra;
    cb_assert(ptd != NULL);

    if (header->request.opcode == PROTOCOL_BINARY_CMD_SASL_AUTH) {
        item *it = c->item;
        cb_assert(it);

        cproxy_sasl_plain_auth(c, (char *) ITEM_data(it));
        return;
    }

    if (header->request.opcode == PROTOCOL_BINARY_CMD_SASL_STEP) {
        write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
        return;
    }

    if (c->binary_header.request.opcode == PROTOCOL_BINARY_CMD_STAT) {
        char *subcommand = binary_get_key(c);
        size_t nkey = c->binary_header.request.keylen;
        if (nkey == 13 && memcmp(subcommand, "proxy buckets", 13) == 0) {
            process_bin_proxy_stats(c);
            return;
        }
    }

    if (c->noreply) {
        if (settings.verbose > 2) {
            moxi_log_write("<%d cproxy_process_upstream_binary_nread "
                    "corking quiet command %x %d\n",
                    c->sfd, c->cmd, (c->corked != NULL));
        }

        /* TODO: We currently don't support binary FLUSHQ. */

        /* Rather than having the downstream connections get */
        /* into a wonky state, prevent it. */

        if (header->request.opcode == PROTOCOL_BINARY_CMD_FLUSHQ) {
            /* Note: don't use cproxy_close_conn(c), as it goes */
            /* through the drive_machine() loop again. */

            /* cproxy_close_conn(c); */

            conn_set_state(c, conn_closing);

            return;
        }

        /* Hold onto or 'cork' all the binary quiet commands */
        /* until there's a later non-quiet command. */

        if (cproxy_binary_cork_cmd(c)) {
            conn_set_state(c, conn_new_cmd);
        } else {
            ptd->stats.stats.err_oom++;
            cproxy_close_conn(c);
        }

        return;
    }

    cb_assert(c->item == NULL || ((item *) c->item)->refcount == 1);

    cproxy_pause_upstream_for_downstream(ptd, c);
}
Beispiel #16
0
/* Called when we receive a binary response header from
 * a downstream server, via try_read_command()/drive_machine().
 */
void cproxy_process_b2b_downstream(conn *c) {
    char *ikey;
    int ikeylen;
    downstream *d;
    int extlen;
    int keylen;
    uint32_t bodylen;

    cb_assert(c != NULL);
    cb_assert(c->cmd >= 0);
    cb_assert(c->next == NULL);
    cb_assert(c->item == NULL);
    cb_assert(IS_BINARY(c->protocol));
    cb_assert(IS_PROXY(c->protocol));
    cb_assert(c->substate == bin_no_state);

    d = c->extra;
    cb_assert(d);

    c->cmd_curr       = -1;
    c->cmd_start      = NULL;
    c->cmd_start_time = msec_current_time;
    c->cmd_retries    = 0;

    extlen  = c->binary_header.request.extlen;
    keylen  = c->binary_header.request.keylen;
    bodylen = c->binary_header.request.bodylen;

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_b2b_downstream %x %d %d %u\n",
                c->sfd, c->cmd, extlen, keylen, bodylen);
    }

    cb_assert(bodylen >= (uint32_t) keylen + extlen);

    process_bin_noreply(c); /* Map quiet c->cmd values into non-quiet. */

    /* Our approach is to read everything we can before */
    /* getting into big switch/case statements for the */
    /* actual processing. */

    /* Alloc an item and continue with an rest-of-body nread if */
    /* necessary.  The item will hold the entire response message */
    /* (the header + body). */

    ikey = "q";
    ikeylen = 1;

    c->item = item_alloc(ikey, ikeylen, 0, 0,
                         sizeof(c->binary_header) + bodylen);
    if (c->item != NULL) {
        item *it = c->item;
        void *rb = c->rcurr;

        cb_assert(it->refcount == 1);

        memcpy(ITEM_data(it), rb, sizeof(c->binary_header));

        if (bodylen > 0) {
            c->ritem = ITEM_data(it) + sizeof(c->binary_header);
            c->rlbytes = bodylen;
            c->substate = bin_read_set_value;

            conn_set_state(c, conn_nread);
        } else {
            /* Since we have no body bytes, we can go immediately to */
            /* the nread completed processing step. */

            cproxy_process_b2b_downstream_nread(c);
        }
    } else {
        d->ptd->stats.stats.err_oom++;
        cproxy_close_conn(c);
    }
}
Beispiel #17
0
/* Used for broadcast commands, like no-op, flush_all or stats.
 */
bool cproxy_broadcast_b2b_downstream(downstream *d, conn *uc) {
    int nwrite = 0;
    int nconns;
    int i;

    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(uc != NULL);
    cb_assert(uc->next == NULL);
    cb_assert(uc->noreply == false);

    nconns = mcs_server_count(&d->mst);

    for (i = 0; i < nconns; i++) {
        conn *c = d->downstream_conns[i];
        if (c != NULL &&
            c != NULL_CONN &&
            b2b_forward_item_vbucket(uc, d, uc->item, c, -1) == true) {
            nwrite++;
        }
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: b2b broadcast nwrite %d out of %d\n",
                uc->sfd, nwrite, nconns);
    }

    if (nwrite > 0) {
        /* TODO: Handle binary 'stats reset' sub-command. */
        item *it;

        if (uc->cmd == PROTOCOL_BINARY_CMD_STAT &&
            d->merger == NULL) {
            d->merger = genhash_init(128, skeyhash_ops);
        }

        it = item_alloc("h", 1, 0, 0,
                              sizeof(protocol_binary_response_header));
        if (it != NULL) {
            protocol_binary_response_header *header =
                (protocol_binary_response_header *) ITEM_data(it);

            memset(ITEM_data(it), 0, it->nbytes);

            header->response.magic  = (uint8_t) PROTOCOL_BINARY_RES;
            header->response.opcode = uc->binary_header.request.opcode;
            header->response.opaque = uc->opaque;

            if (add_conn_item(uc, it)) {
                d->upstream_suffix     = ITEM_data(it);
                d->upstream_suffix_len = it->nbytes;
                d->upstream_status = PROTOCOL_BINARY_RESPONSE_SUCCESS;
                d->target_host_ident = NULL;

                if (settings.verbose > 2) {
                    moxi_log_write("%d: b2b broadcast upstream_suffix", uc->sfd);
                    cproxy_dump_header(uc->sfd, ITEM_data(it));
                }

                /* TODO: Handle FLUSHQ (quiet binary flush_all). */

                d->downstream_used_start = nwrite;
                d->downstream_used       = nwrite;

                cproxy_start_downstream_timeout(d, NULL);

                return true;
            }

            item_remove(it);
        }
    }

    return false;
}
Beispiel #18
0
void process_command(conn *c, char *command) {

    int comm = 0;
    int incr = 0;

    /*
     * for commands set/add/replace, we build an item and read the data
     * directly into it, then continue in nread_complete().
     */

    if (settings.verbose > 1)
        fprintf(stderr, "<%d %s\n", c->sfd, command);

    /* All incoming commands will require a response, so we cork at the beginning,
       and uncork at the very end (usually by means of out_string)  */
    set_cork(c, 1);

    if ((strncmp(command, "add ", 4) == 0 && (comm = NREAD_ADD)) ||
            (strncmp(command, "set ", 4) == 0 && (comm = NREAD_SET)) ||
            (strncmp(command, "replace ", 8) == 0 && (comm = NREAD_REPLACE))) {

        char key[251];
        int flags;
        time_t expire;
        int len, res;
        item *it;

        res = sscanf(command, "%*s %250s %u %ld %d\n", key, &flags, &expire, &len);
        if (res!=4 || strlen(key)==0 ) {
            out_string(c, "CLIENT_ERROR bad command line format");
            return;
        }
        expire = realtime(expire);
        it = item_alloc(key, flags, expire, len+2);
        if (it == 0) {
            out_string(c, "SERVER_ERROR out of memory");
            /* swallow the data line */
            c->write_and_go = conn_swallow;
            c->sbytes = len+2;
            return;
        }

        c->item_comm = comm;
        c->item = it;
        c->rcurr = ITEM_data(it);
        c->rlbytes = it->nbytes;
        c->state = conn_nread;
        return;
    }

    if ((strncmp(command, "incr ", 5) == 0 && (incr = 1)) ||
            (strncmp(command, "decr ", 5) == 0)) {
        char temp[32];
        unsigned int value;
        item *it;
        unsigned int delta;
        char key[251];
        int res;
        char *ptr;
        time_t now = time(0);

        res = sscanf(command, "%*s %250s %u\n", key, &delta);
        if (res!=2 || strlen(key)==0 ) {
            out_string(c, "CLIENT_ERROR bad command line format");
            return;
        }

        it = assoc_find(key);
        if (it && (it->it_flags & ITEM_DELETED)) {
            it = 0;
        }
        if (it && it->exptime && it->exptime < now) {
            item_unlink(it);
            it = 0;
        }

        if (!it) {
            out_string(c, "NOT_FOUND");
            return;
        }

        ptr = ITEM_data(it);
        while (*ptr && (*ptr<'0' && *ptr>'9')) ptr++;

        value = atoi(ptr);

        if (incr)
            value+=delta;
        else {
            if (delta >= value) value = 0;
            else value-=delta;
        }

        sprintf(temp, "%u", value);
        res = strlen(temp);
        if (res + 2 > it->nbytes) { /* need to realloc */
            item *new_it;
            new_it = item_alloc(ITEM_key(it), it->flags, it->exptime, res + 2 );
            if (new_it == 0) {
                out_string(c, "SERVER_ERROR out of memory");
                return;
            }
            memcpy(ITEM_data(new_it), temp, res);
            memcpy(ITEM_data(new_it) + res, "\r\n", 2);
            item_replace(it, new_it);
        } else { /* replace in-place */
            memcpy(ITEM_data(it), temp, res);
            memset(ITEM_data(it) + res, ' ', it->nbytes-res-2);
        }
        out_string(c, temp);
        return;
    }

    if (strncmp(command, "get ", 4) == 0) {

        char *start = command + 4;
        char key[251];
        int next;
        int i = 0;
        item *it;
        time_t now = time(0);

        while(sscanf(start, " %250s%n", key, &next) >= 1) {
            start+=next;
            stats.get_cmds++;
            it = assoc_find(key);
            if (it && (it->it_flags & ITEM_DELETED)) {
                it = 0;
            }
            if (settings.oldest_live && it &&
                    it->time <= settings.oldest_live) {
                item_unlink(it);
                it = 0;
            }
            if (it && it->exptime && it->exptime < now) {
                item_unlink(it);
                it = 0;
            }

            if (it) {
                if (i >= c->isize) {
                    item **new_list = realloc(c->ilist, sizeof(item *)*c->isize*2);
                    if (new_list) {
                        c->isize *= 2;
                        c->ilist = new_list;
                    } else break;
                }
                stats.get_hits++;
                it->refcount++;
                item_update(it);
                *(c->ilist + i) = it;
                i++;
            } else stats.get_misses++;
        }
        c->icurr = c->ilist;
        c->ileft = i;
        if (c->ileft) {
            c->ipart = 0;
            c->state = conn_mwrite;
            c->ibytes = 0;
            return;
        } else {
            out_string(c, "END");
            return;
        }
    }

    if (strncmp(command, "delete ", 7) == 0) {
        char key[251];
        item *it;
        int res;
        time_t exptime = 0;

        res = sscanf(command, "%*s %250s %ld", key, &exptime);
        it = assoc_find(key);
        if (!it) {
            out_string(c, "NOT_FOUND");
            return;
        }

        if (exptime == 0) {
            item_unlink(it);
            out_string(c, "DELETED");
            return;
        }

        if (delcurr >= deltotal) {
            item **new_delete = realloc(todelete, sizeof(item *) * deltotal * 2);
            if (new_delete) {
                todelete = new_delete;
                deltotal *= 2;
            } else {
                /*
                 * can't delete it immediately, user wants a delay,
                 * but we ran out of memory for the delete queue
                 */
                out_string(c, "SERVER_ERROR out of memory");
                return;
            }
        }

        exptime = realtime(exptime);

        it->refcount++;
        /* use its expiration time as its deletion time now */
        it->exptime = exptime;
        it->it_flags |= ITEM_DELETED;
        todelete[delcurr++] = it;
        out_string(c, "DELETED");
        return;
    }

    if (strncmp(command, "stats", 5) == 0) {
        process_stat(c, command);
        return;
    }

    if (strcmp(command, "flush_all") == 0) {
        settings.oldest_live = time(0);
        out_string(c, "OK");
        return;
    }

    if (strcmp(command, "version") == 0) {
        out_string(c, "VERSION " VERSION);
        return;
    }

    if (strcmp(command, "quit") == 0) {
        c->state = conn_closing;
        return;
    }

    if (strncmp(command, "slabs reassign ", 15) == 0) {
        int src, dst;
        char *start = command+15;
        if (sscanf(start, "%u %u\r\n", &src, &dst) == 2) {
            int rv = slabs_reassign(src, dst);
            if (rv == 1) {
                out_string(c, "DONE");
                return;
            }
            if (rv == 0) {
                out_string(c, "CANT");
                return;
            }
            if (rv == -1) {
                out_string(c, "BUSY");
                return;
            }
        }
        out_string(c, "CLIENT_ERROR bogus command");
        return;
    }

    out_string(c, "ERROR");
    return;
}
Beispiel #19
0
/* refcount == 0 is safe since nobody can incr while item_lock is held.
 * refcount != 0 is impossible since flags/etc can be modified in other
 * threads. instead, note we found a busy one and bail. logic in do_item_get
 * will prevent busy items from continuing to be busy
 * NOTE: This is checking it_flags outside of an item lock. I believe this
 * works since it_flags is 8 bits, and we're only ever comparing a single bit
 * regardless. ITEM_SLABBED bit will always be correct since we're holding the
 * lock which modifies that bit. ITEM_LINKED won't exist if we're between an
 * item having ITEM_SLABBED removed, and the key hasn't been added to the item
 * yet. The memory barrier from the slabs lock should order the key write and the
 * flags to the item?
 * If ITEM_LINKED did exist and was just removed, but we still see it, that's
 * still safe since it will have a valid key, which we then lock, and then
 * recheck everything.
 * This may not be safe on all platforms; If not, slabs_alloc() will need to
 * seed the item key while holding slabs_lock.
 */
static int slab_rebalance_move(void) {
    slabclass_t *s_cls;
    int x;
    int was_busy = 0;
    int refcount = 0;
    uint32_t hv;
    void *hold_lock;
    enum move_status status = MOVE_PASS;

    pthread_mutex_lock(&slabs_lock);

    s_cls = &slabclass[slab_rebal.s_clsid];

    for (x = 0; x < slab_bulk_check; x++) {
        hv = 0;
        hold_lock = NULL;
        item *it = slab_rebal.slab_pos;
        item_chunk *ch = NULL;
        status = MOVE_PASS;
        if (it->it_flags & ITEM_CHUNK) {
            /* This chunk is a chained part of a larger item. */
            ch = (item_chunk *) it;
            /* Instead, we use the head chunk to find the item and effectively
             * lock the entire structure. If a chunk has ITEM_CHUNK flag, its
             * head cannot be slabbed, so the normal routine is safe. */
            it = ch->head;
            assert(it->it_flags & ITEM_CHUNKED);
        }

        /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared
         * the chunk for move. Only these two flags should exist.
         */
        if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) {
            /* ITEM_SLABBED can only be added/removed under the slabs_lock */
            if (it->it_flags & ITEM_SLABBED) {
                assert(ch == NULL);
                slab_rebalance_cut_free(s_cls, it);
                status = MOVE_FROM_SLAB;
            } else if ((it->it_flags & ITEM_LINKED) != 0) {
                /* If it doesn't have ITEM_SLABBED, the item could be in any
                 * state on its way to being freed or written to. If no
                 * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active
                 * and have the key written to it already.
                 */
                hv = hash(ITEM_key(it), it->nkey);
                if ((hold_lock = item_trylock(hv)) == NULL) {
                    status = MOVE_LOCKED;
                } else {
                    refcount = refcount_incr(it);
                    if (refcount == 2) { /* item is linked but not busy */
                        /* Double check ITEM_LINKED flag here, since we're
                         * past a memory barrier from the mutex. */
                        if ((it->it_flags & ITEM_LINKED) != 0) {
                            status = MOVE_FROM_LRU;
                        } else {
                            /* refcount == 1 + !ITEM_LINKED means the item is being
                             * uploaded to, or was just unlinked but hasn't been freed
                             * yet. Let it bleed off on its own and try again later */
                            status = MOVE_BUSY;
                        }
                    } else {
                        if (settings.verbose > 2) {
                            fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
                                it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
                        }
                        status = MOVE_BUSY;
                    }
                    /* Item lock must be held while modifying refcount */
                    if (status == MOVE_BUSY) {
                        refcount_decr(it);
                        item_trylock_unlock(hold_lock);
                    }
                }
            } else {
                /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark
                 * busy and wait for item to complete its upload. */
                status = MOVE_BUSY;
            }
        }

        int save_item = 0;
        item *new_it = NULL;
        size_t ntotal = 0;
        switch (status) {
            case MOVE_FROM_LRU:
                /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock.
                 * We only need to hold the slabs_lock while initially looking
                 * at an item, and at this point we have an exclusive refcount
                 * (2) + the item is locked. Drop slabs lock, drop item to
                 * refcount 1 (just our own, then fall through and wipe it
                 */
                /* Check if expired or flushed */
                ntotal = ITEM_ntotal(it);
                /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */
                if (ch == NULL && (it->it_flags & ITEM_CHUNKED)) {
                    /* Chunked should be identical to non-chunked, except we need
                     * to swap out ntotal for the head-chunk-total. */
                    ntotal = s_cls->size;
                }
                if ((it->exptime != 0 && it->exptime < current_time)
                    || item_is_flushed(it)) {
                    /* Expired, don't save. */
                    save_item = 0;
                } else if (ch == NULL &&
                        (new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) {
                    /* Not a chunk of an item, and nomem. */
                    save_item = 0;
                    slab_rebal.evictions_nomem++;
                } else if (ch != NULL &&
                        (new_it = slab_rebalance_alloc(s_cls->size, slab_rebal.s_clsid)) == NULL) {
                    /* Is a chunk of an item, and nomem. */
                    save_item = 0;
                    slab_rebal.evictions_nomem++;
                } else {
                    /* Was whatever it was, and we have memory for it. */
                    save_item = 1;
                }
                pthread_mutex_unlock(&slabs_lock);
                unsigned int requested_adjust = 0;
                if (save_item) {
                    if (ch == NULL) {
                        assert((new_it->it_flags & ITEM_CHUNKED) == 0);
                        /* if free memory, memcpy. clear prev/next/h_bucket */
                        memcpy(new_it, it, ntotal);
                        new_it->prev = 0;
                        new_it->next = 0;
                        new_it->h_next = 0;
                        /* These are definitely required. else fails assert */
                        new_it->it_flags &= ~ITEM_LINKED;
                        new_it->refcount = 0;
                        do_item_replace(it, new_it, hv);
                        /* Need to walk the chunks and repoint head  */
                        if (new_it->it_flags & ITEM_CHUNKED) {
                            item_chunk *fch = (item_chunk *) ITEM_data(new_it);
                            fch->next->prev = fch;
                            while (fch) {
                                fch->head = new_it;
                                fch = fch->next;
                            }
                        }
                        it->refcount = 0;
                        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
#ifdef DEBUG_SLAB_MOVER
                        memcpy(ITEM_key(it), "deadbeef", 8);
#endif
                        slab_rebal.rescues++;
                        requested_adjust = ntotal;
                    } else {
                        item_chunk *nch = (item_chunk *) new_it;
                        /* Chunks always have head chunk (the main it) */
                        ch->prev->next = nch;
                        if (ch->next)
                            ch->next->prev = nch;
                        memcpy(nch, ch, ch->used + sizeof(item_chunk));
                        ch->refcount = 0;
                        ch->it_flags = ITEM_SLABBED|ITEM_FETCHED;
                        slab_rebal.chunk_rescues++;
#ifdef DEBUG_SLAB_MOVER
                        memcpy(ITEM_key((item *)ch), "deadbeef", 8);
#endif
                        refcount_decr(it);
                        requested_adjust = s_cls->size;
                    }
                } else {
                    /* restore ntotal in case we tried saving a head chunk. */
                    ntotal = ITEM_ntotal(it);
                    do_item_unlink(it, hv);
                    slabs_free(it, ntotal, slab_rebal.s_clsid);
                    /* Swing around again later to remove it from the freelist. */
                    slab_rebal.busy_items++;
                    was_busy++;
                }
                item_trylock_unlock(hold_lock);
                pthread_mutex_lock(&slabs_lock);
                /* Always remove the ntotal, as we added it in during
                 * do_slabs_alloc() when copying the item.
                 */
                s_cls->requested -= requested_adjust;
                break;
            case MOVE_FROM_SLAB:
                it->refcount = 0;
                it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
#ifdef DEBUG_SLAB_MOVER
                memcpy(ITEM_key(it), "deadbeef", 8);
#endif
                break;
            case MOVE_BUSY:
            case MOVE_LOCKED:
                slab_rebal.busy_items++;
                was_busy++;
                break;
            case MOVE_PASS:
                break;
        }

        slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
        if (slab_rebal.slab_pos >= slab_rebal.slab_end)
            break;
    }

    if (slab_rebal.slab_pos >= slab_rebal.slab_end) {
        /* Some items were busy, start again from the top */
        if (slab_rebal.busy_items) {
            slab_rebal.slab_pos = slab_rebal.slab_start;
            STATS_LOCK();
            stats.slab_reassign_busy_items += slab_rebal.busy_items;
            STATS_UNLOCK();
            slab_rebal.busy_items = 0;
        } else {
            slab_rebal.done++;
        }
    }

    pthread_mutex_unlock(&slabs_lock);

    return was_busy;
}
Beispiel #20
0
/* Do the actual work of forwarding the command from an
 * upstream binary conn to its assigned binary downstream.
 */
bool cproxy_forward_b2b_downstream(downstream *d) {
    int nc;
    int server_index;
    conn *uc;

    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);
    cb_assert(d->downstream_conns != NULL);
    cb_assert(d->downstream_used == 0);
    cb_assert(d->multiget == NULL);
    cb_assert(d->merger == NULL);

    d->downstream_used_start = 0;

    uc = d->upstream_conn;
    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream %x\n",
                uc->sfd, uc->cmd);
    }

    cb_assert(uc != NULL);
    cb_assert(uc->state == conn_pause);
    cb_assert(uc->cmd >= 0);
    cb_assert(uc->cmd_start == NULL);
    cb_assert(uc->thread != NULL);
    cb_assert(uc->thread->base != NULL);
    cb_assert(uc->noreply == false);
    cb_assert(IS_BINARY(uc->protocol));
    cb_assert(IS_PROXY(uc->protocol));

    server_index = -1;

    if (cproxy_is_broadcast_cmd(uc->cmd) == false && uc->corked == NULL) {
        item *it = uc->item;
        protocol_binary_request_header *req;
        char *key;
        int key_len;

        cb_assert(it != NULL);

        req = (protocol_binary_request_header *) ITEM_data(it);
        key = ((char *) req) + sizeof(*req) + req->request.extlen;
        key_len = ntohs(req->request.keylen);

        if (key_len > 0) {
            server_index = cproxy_server_index(d, key, key_len, NULL);
            if (server_index < 0) {
                return false;
            }
        }
    }

    nc = cproxy_connect_downstream(d, uc->thread, server_index);
    if (nc == -1) {
        return true;
    }

    if (nc > 0) {
        int i;
        int nconns;

        cb_assert(d->downstream_conns != NULL);

        if (d->usec_start == 0 &&
            d->ptd->behavior_pool.base.time_stats) {
            d->usec_start = usec_now();
        }

        nconns = mcs_server_count(&d->mst);
        for (i = 0; i < nconns; i++) {
            conn *c = d->downstream_conns[i];
            if (c != NULL &&
                c != NULL_CONN) {
                cb_assert(c->state == conn_pause);
                cb_assert(c->item == NULL);

                if (cproxy_prep_conn_for_write(c) == false) {
                    d->ptd->stats.stats.err_downstream_write_prep++;
                    cproxy_close_conn(c);

                    return false;
                }
            }
        }

        /* Uncork the saved-up quiet binary commands. */

        cproxy_binary_uncork_cmds(d, uc);

        if (uc->cmd == PROTOCOL_BINARY_CMD_FLUSH ||
            uc->cmd == PROTOCOL_BINARY_CMD_NOOP ||
            uc->cmd == PROTOCOL_BINARY_CMD_STAT) {
            return cproxy_broadcast_b2b_downstream(d, uc);
        }

        return cproxy_forward_b2b_simple_downstream(d, uc);
    }

    if (settings.verbose > 2) {
        moxi_log_write("%d: cproxy_forward_b2b_downstream connect failed\n",
                uc->sfd);
    }

    return false;
}
Beispiel #21
0
static int storage_write(void *storage, const int clsid, const int item_age) {
    int did_moves = 0;
    struct lru_pull_tail_return it_info;

    it_info.it = NULL;
    lru_pull_tail(clsid, COLD_LRU, 0, LRU_PULL_RETURN_ITEM, 0, &it_info);
    /* Item is locked, and we have a reference to it. */
    if (it_info.it == NULL) {
        return did_moves;
    }

    obj_io io;
    item *it = it_info.it;
    /* First, storage for the header object */
    size_t orig_ntotal = ITEM_ntotal(it);
    uint32_t flags;
    if ((it->it_flags & ITEM_HDR) == 0 &&
            (item_age == 0 || current_time - it->time > item_age)) {
        FLAGS_CONV(it, flags);
        item *hdr_it = do_item_alloc(ITEM_key(it), it->nkey, flags, it->exptime, sizeof(item_hdr));
        /* Run the storage write understanding the start of the item is dirty.
         * We will fill it (time/exptime/etc) from the header item on read.
         */
        if (hdr_it != NULL) {
            int bucket = (it->it_flags & ITEM_CHUNKED) ?
                PAGE_BUCKET_CHUNKED : PAGE_BUCKET_DEFAULT;
            // Compress soon to expire items into similar pages.
            if (it->exptime - current_time < settings.ext_low_ttl) {
                bucket = PAGE_BUCKET_LOWTTL;
            }
            hdr_it->it_flags |= ITEM_HDR;
            io.len = orig_ntotal;
            io.mode = OBJ_IO_WRITE;
            // NOTE: when the item is read back in, the slab mover
            // may see it. Important to have refcount>=2 or ~ITEM_LINKED
            assert(it->refcount >= 2);
            // NOTE: write bucket vs free page bucket will disambiguate once
            // lowttl feature is better understood.
            if (extstore_write_request(storage, bucket, bucket, &io) == 0) {
                // cuddle the hash value into the time field so we don't have
                // to recalculate it.
                item *buf_it = (item *) io.buf;
                buf_it->time = it_info.hv;
                // copy from past the headers + time headers.
                // TODO: should be in items.c
                if (it->it_flags & ITEM_CHUNKED) {
                    // Need to loop through the item and copy
                    item_chunk *sch = (item_chunk *) ITEM_schunk(it);
                    int remain = orig_ntotal;
                    int copied = 0;
                    // copy original header
                    int hdrtotal = ITEM_ntotal(it) - it->nbytes;
                    memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, hdrtotal - STORE_OFFSET);
                    copied = hdrtotal;
                    // copy data in like it were one large object.
                    while (sch && remain) {
                        assert(remain >= sch->used);
                        memcpy((char *)io.buf+copied, sch->data, sch->used);
                        // FIXME: use one variable?
                        remain -= sch->used;
                        copied += sch->used;
                        sch = sch->next;
                    }
                } else {
                    memcpy((char *)io.buf+STORE_OFFSET, (char *)it+STORE_OFFSET, io.len-STORE_OFFSET);
                }
                // crc what we copied so we can do it sequentially.
                buf_it->it_flags &= ~ITEM_LINKED;
                buf_it->exptime = crc32c(0, (char*)io.buf+STORE_OFFSET, orig_ntotal-STORE_OFFSET);
                extstore_write(storage, &io);
                item_hdr *hdr = (item_hdr *) ITEM_data(hdr_it);
                hdr->page_version = io.page_version;
                hdr->page_id = io.page_id;
                hdr->offset  = io.offset;
                // overload nbytes for the header it
                hdr_it->nbytes = it->nbytes;
                /* success! Now we need to fill relevant data into the new
                 * header and replace. Most of this requires the item lock
                 */
                /* CAS gets set while linking. Copy post-replace */
                item_replace(it, hdr_it, it_info.hv);
                ITEM_set_cas(hdr_it, ITEM_get_cas(it));
                do_item_remove(hdr_it);
                did_moves = 1;
                LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EXTSTORE_WRITE, it, bucket);
            } else {
                /* Failed to write for some reason, can't continue. */
                slabs_free(hdr_it, ITEM_ntotal(hdr_it), ITEM_clsid(hdr_it));
            }
        }
    }
    do_item_remove(it);
    item_unlock(it_info.hv);
    return did_moves;
}
Beispiel #22
0
/* We reach here after nread'ing a header+body into an item.
 */
void cproxy_process_b2b_downstream_nread(conn *c) {
    conn *uc;
    item *it;
    downstream *d;
    protocol_binary_response_header *header;
    int extlen;
    int keylen;
    uint32_t bodylen;
    int status;
    int opcode;

    cb_assert(c != NULL);
    cb_assert(c->cmd >= 0);
    cb_assert(c->next == NULL);
    cb_assert(c->cmd_start == NULL);
    cb_assert(IS_BINARY(c->protocol));
    cb_assert(IS_PROXY(c->protocol));

    header = (protocol_binary_response_header *) &c->binary_header;
    extlen = header->response.extlen;
    keylen = header->response.keylen;
    bodylen = header->response.bodylen;
    status = ntohs(header->response.status);
    opcode = header->response.opcode;

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_b2b_downstream_nread %x %x %d %d %u %d %x\n",
                c->sfd, c->cmd, opcode, extlen, keylen, bodylen, c->noreply, status);
    }

    d = c->extra;
    cb_assert(d != NULL);
    cb_assert(d->ptd != NULL);
    cb_assert(d->ptd->proxy != NULL);

    /* TODO: Need to handle quiet binary command error response, */
    /*       in the right order. */
    /* TODO: Need to handle not-my-vbucket error during a quiet cmd. */

    uc = d->upstream_conn;
    it = c->item;

    /* Clear c->item because we either move it to the upstream or */
    /* item_remove() it on error. */

    c->item = NULL;

    cb_assert(it != NULL);
    cb_assert(it->refcount == 1);

    if (cproxy_binary_ignore_reply(c, header, it)) {
        return;
    }

    if (c->noreply) {
        conn_set_state(c, conn_new_cmd);
    } else {
        conn_set_state(c, conn_pause);

        if (opcode == PROTOCOL_BINARY_CMD_NOOP ||
            opcode == PROTOCOL_BINARY_CMD_FLUSH) {
            goto done;
        }

        if (opcode == PROTOCOL_BINARY_CMD_STAT) {
            if (status == PROTOCOL_BINARY_RESPONSE_SUCCESS) {
                if (keylen > 0) {
                    if (d->merger != NULL) {
                        char *key = (ITEM_data(it)) + sizeof(*header) + extlen;
                        char *val = key + keylen;

                        protocol_stats_merge_name_val(d->merger, "STAT", 4,
                                                      key, keylen,
                                                      val, bodylen - keylen - extlen);
                    }

                    conn_set_state(c, conn_new_cmd); /* Get next STATS response. */
                }
            }

            goto done;
        }

        /* If the client is still there, we should handle */
        /* a not-my-vbucket error with a possible retry. */

        if (uc != NULL &&
            status == PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET) {

            int max_retries;
            protocol_binary_request_header *req;
            int vbucket;
            int sindex;

            if (settings.verbose > 2) {
                moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, "
                        "cmd: %x %d\n",
                        c->sfd, header->response.opcode, uc->item != NULL);
            }

            cb_assert(uc->item != NULL);

            req = (protocol_binary_request_header *)ITEM_data((item*)uc->item);

            vbucket = ntohs(req->request.reserved);
            sindex = downstream_conn_index(d, c);

            if (settings.verbose > 2) {
                moxi_log_write("<%d cproxy_process_b2b_downstream_nread not-my-vbucket, "
                        "cmd: %x not multi-key get, sindex %d, vbucket %d, retries %d\n",
                        c->sfd, header->response.opcode,
                        sindex, vbucket, uc->cmd_retries);
            }

            mcs_server_invalid_vbucket(&d->mst, sindex, vbucket);

            /* As long as the upstream is still open and we haven't */
            /* retried too many times already. */

            max_retries = cproxy_max_retries(d);

            if (uc->cmd_retries < max_retries) {
                uc->cmd_retries++;

                d->upstream_retry++;
                d->ptd->stats.stats.tot_retry_vbucket++;

                goto done;
            }

            if (settings.verbose > 2) {
                moxi_log_write("%d: cproxy_process_b2b_downstream_nread not-my-vbucket, "
                        "cmd: %x skipping retry %d >= %d\n",
                        c->sfd, header->response.opcode, uc->cmd_retries,
                        max_retries);
            }
        }
    }

    /* Write the response to the upstream connection. */

    if (uc != NULL) {
        if (settings.verbose > 2) {
            moxi_log_write("<%d cproxy_process_b2b_downstream_nread got %u\n",
                           c->sfd, it->nbytes);

            cproxy_dump_header(c->sfd, ITEM_data(it));
        }

        if (add_conn_item(uc, it) == true) {
            it->refcount++;

            if (add_iov(uc, ITEM_data(it), it->nbytes) == 0) {
                /* If we got a quiet response, however, don't change the */
                /* upstream connection's state (should be in paused state), */
                /* as we expect the downstream server to provide a */
                /* verbal/non-quiet response that moves the downstream */
                /* conn through the conn_pause countdown codepath. */

                if (c->noreply == false) {
                    cproxy_update_event_write(d, uc);

                    conn_set_state(uc, conn_mwrite);
                }

                goto done;
            }
        }

        d->ptd->stats.stats.err_oom++;
        cproxy_close_conn(uc);
    }

 done:
    if (it != NULL) {
        item_remove(it);
    }
}
Beispiel #23
0
static void storage_compact_readback(void *storage, logger *l,
        bool drop_unread, char *readback_buf,
        uint32_t page_id, uint64_t page_version, uint64_t read_size) {
    uint64_t offset = 0;
    unsigned int rescues = 0;
    unsigned int lost = 0;
    unsigned int skipped = 0;

    while (offset < read_size) {
        item *hdr_it = NULL;
        item_hdr *hdr = NULL;
        item *it = (item *)(readback_buf+offset);
        unsigned int ntotal;
        // probably zeroed out junk at the end of the wbuf
        if (it->nkey == 0) {
            break;
        }

        ntotal = ITEM_ntotal(it);
        uint32_t hv = (uint32_t)it->time;
        item_lock(hv);
        // We don't have a conn and don't need to do most of do_item_get
        hdr_it = assoc_find(ITEM_key(it), it->nkey, hv);
        if (hdr_it != NULL) {
            bool do_write = false;
            refcount_incr(hdr_it);

            // Check validity but don't bother removing it.
            if ((hdr_it->it_flags & ITEM_HDR) && !item_is_flushed(hdr_it) &&
                   (hdr_it->exptime == 0 || hdr_it->exptime > current_time)) {
                hdr = (item_hdr *)ITEM_data(hdr_it);
                if (hdr->page_id == page_id && hdr->page_version == page_version) {
                    // Item header is still completely valid.
                    extstore_delete(storage, page_id, page_version, 1, ntotal);
                    // drop inactive items.
                    if (drop_unread && GET_LRU(hdr_it->slabs_clsid) == COLD_LRU) {
                        do_write = false;
                        skipped++;
                    } else {
                        do_write = true;
                    }
                }
            }

            if (do_write) {
                bool do_update = false;
                int tries;
                obj_io io;
                io.len = ntotal;
                io.mode = OBJ_IO_WRITE;
                for (tries = 10; tries > 0; tries--) {
                    if (extstore_write_request(storage, PAGE_BUCKET_COMPACT, PAGE_BUCKET_COMPACT, &io) == 0) {
                        memcpy(io.buf, it, io.len);
                        extstore_write(storage, &io);
                        do_update = true;
                        break;
                    } else {
                        usleep(1000);
                    }
                }

                if (do_update) {
                    if (it->refcount == 2) {
                        hdr->page_version = io.page_version;
                        hdr->page_id = io.page_id;
                        hdr->offset = io.offset;
                        rescues++;
                    } else {
                        lost++;
                        // TODO: re-alloc and replace header.
                    }
                } else {
                    lost++;
                }
            }

            do_item_remove(hdr_it);
        }

        item_unlock(hv);
        offset += ntotal;
        if (read_size - offset < sizeof(struct _stritem))
            break;
    }

    STATS_LOCK();
    stats.extstore_compact_lost += lost;
    stats.extstore_compact_rescues += rescues;
    stats.extstore_compact_skipped += skipped;
    STATS_UNLOCK();
    LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_COMPACT_READ_END,
            NULL, page_id, offset, rescues, lost, skipped);
}
Beispiel #24
0
void cproxy_process_upstream_binary(conn *c) {
    cb_assert(c != NULL);
    cb_assert(c->cmd >= 0);
    cb_assert(c->next == NULL);
    cb_assert(c->item == NULL);
    cb_assert(IS_BINARY(c->protocol));
    cb_assert(IS_PROXY(c->protocol));

    proxy_td *ptd = c->extra;
    cb_assert(ptd != NULL);

    if (!cproxy_prep_conn_for_write(c)) {
        ptd->stats.stats.err_upstream_write_prep++;
        conn_set_state(c, conn_closing);
        return;
    }

    c->cmd_curr       = -1;
    c->cmd_start      = NULL;
    c->cmd_start_time = msec_current_time;
    c->cmd_retries    = 0;

    int      extlen  = c->binary_header.request.extlen;
    int      keylen  = c->binary_header.request.keylen;
    uint32_t bodylen = c->binary_header.request.bodylen;

    cb_assert(bodylen >= (uint32_t) keylen + extlen);

    if (settings.verbose > 2) {
        moxi_log_write("<%d cproxy_process_upstream_binary %x %d %d %u\n",
                c->sfd, c->cmd, extlen, keylen, bodylen);
    }

    process_bin_noreply(c); /* Map quiet c->cmd values into non-quiet. */

    if (c->cmd == PROTOCOL_BINARY_CMD_VERSION ||
        c->cmd == PROTOCOL_BINARY_CMD_QUIT) {
        dispatch_bin_command(c);
        return;
    }

    /* Alloc an item and continue with an rest-of-body nread if */
    /* necessary.  The item will hold the entire request message */
    /* (the header + body). */

    char *ikey    = "u";
    int   ikeylen = 1;

    c->item = item_alloc(ikey, ikeylen, 0, 0,
                         sizeof(c->binary_header) + bodylen);
    if (c->item != NULL) {
        item *it = c->item;
        void *rb = c->rcurr;

        cb_assert(it->refcount == 1);

        memcpy(ITEM_data(it), rb, sizeof(c->binary_header));

        if (bodylen > 0) {
            c->ritem = ITEM_data(it) + sizeof(c->binary_header);
            c->rlbytes = bodylen;
            c->substate = bin_read_set_value;

            conn_set_state(c, conn_nread);
        } else {
            /* Since we have no body bytes, we can go immediately to */
            /* the nread completed processing step. */

            if (c->binary_header.request.opcode == PROTOCOL_BINARY_CMD_SASL_LIST_MECHS) {
                /* TODO: One day handle more than just PLAIN sasl auth. */

                write_bin_response(c, "PLAIN", 0, 0, strlen("PLAIN"));
                return;
            }

            cproxy_pause_upstream_for_downstream(ptd, c);
        }
    } else {
        if (settings.verbose > 2) {
            moxi_log_write("<%d cproxy_process_upstream_binary OOM\n",
                           c->sfd);
        }
        ptd->stats.stats.err_oom++;
        cproxy_close_conn(c);
    }
}
static int replication_rep(conn *c, item *it)
{
    int   r = 0;
    int exp = 0;
    int len = 0;
    char *s = "rep ";
    char *n = "\r\n";
    char *p = NULL;
    char flag[40];

    if(it->exptime)
        exp = it->exptime + stats.started;
    flag[0]=0;
    if(p=ITEM_suffix(it)){
        int i;
        memcpy(flag, p, it->nsuffix - 2);
        flag[it->nsuffix - 2] = 0;
        for(i=0;i<strlen(flag);i++){
            if(flag[i] > ' ')
                break;
        }
        memmove(flag,&flag[i],strlen(flag)-i);
        for(p=flag;*p>' ';p++);
        *p=0;
    }
    len += strlen(s);
    len += it->nkey;
    len += 1;
    len += strlen(flag);
    len += 1;
    len += replication_get_num(NULL, exp);
    len += 1;
    len += replication_get_num(NULL, it->nbytes - 2);
    len += 1;
    len += replication_get_num(NULL, it->cas_id);
    len += strlen(n);
    len += it->nbytes;
    len += strlen(n);
    if(replication_alloc(c,len) == -1){
        fprintf(stderr, "replication: rep alloc error\n");
        return(-1);
    }
    p = c->wcurr + c->wbytes;
    memcpy(p, s, strlen(s));
    p += strlen(s);
    memcpy(p, ITEM_key(it), it->nkey);
    p += it->nkey;
    *(p++) = ' ';
    memcpy(p, flag, strlen(flag));
    p += strlen(flag);
    *(p++) = ' ';
    p += replication_get_num(p, exp);
    *(p++) = ' ';
    p += replication_get_num(p, it->nbytes - 2);
    *(p++) = ' ';
    p += replication_get_num(p, it->cas_id);
    memcpy(p, n, strlen(n));
    p += strlen(n);
    memcpy(p, ITEM_data(it), it->nbytes);
    p += it->nbytes;
    c->wbytes = p - c->wcurr;
    return(0);
}
Beispiel #26
0
/* Forward an upstream command that came with item data,
 * like set/add/replace/etc.
 */
bool cproxy_forward_a2a_item_downstream(downstream *d, short cmd,
                                        item *it, conn *uc) {
    assert(d != NULL);
    assert(d->ptd != NULL);
    assert(d->ptd->proxy != NULL);
    assert(d->downstream_conns != NULL);
    assert(it != NULL);
    assert(uc != NULL);
    assert(uc->next == NULL);

    // Assuming we're already connected to downstream.
    //
    bool self = false;

    conn *c = cproxy_find_downstream_conn(d, ITEM_key(it), it->nkey,
                                          &self);
    if (c != NULL) {
        if (self) {
            cproxy_optimize_to_self(d, uc, uc->cmd_start);
            complete_nread_ascii(uc);
            return true;
        }

        if (cproxy_prep_conn_for_write(c)) {
            assert(c->state == conn_pause);

            char *verb = nread_text(cmd);
            assert(verb != NULL);

            char *str_flags   = ITEM_suffix(it);
            char *str_length  = strchr(str_flags + 1, ' ');
            int   len_flags   = str_length - str_flags;
            int   len_length  = it->nsuffix - len_flags - 2;
            char *str_exptime = add_conn_suffix(c);
            char *str_cas     = (cmd == NREAD_CAS ? add_conn_suffix(c) : NULL);

            if (str_flags != NULL &&
                str_length != NULL &&
                len_flags > 1 &&
                len_length > 1 &&
                str_exptime != NULL &&
                (cmd != NREAD_CAS ||
                 str_cas != NULL)) {
                sprintf(str_exptime, " %u", it->exptime);

                if (str_cas != NULL)
                    sprintf(str_cas, " %llu",
                            (unsigned long long) ITEM_get_cas(it));

                if (add_iov(c, verb, strlen(verb)) == 0 &&
                    add_iov(c, ITEM_key(it), it->nkey) == 0 &&
                    add_iov(c, str_flags, len_flags) == 0 &&
                    add_iov(c, str_exptime, strlen(str_exptime)) == 0 &&
                    add_iov(c, str_length, len_length) == 0 &&
                    (str_cas == NULL ||
                     add_iov(c, str_cas, strlen(str_cas)) == 0) &&
                    (uc->noreply == false ||
                     add_iov(c, " noreply", 8) == 0) &&
                    add_iov(c, ITEM_data(it) - 2, it->nbytes + 2) == 0) {
                    conn_set_state(c, conn_mwrite);
                    c->write_and_go = conn_new_cmd;

                    if (update_event(c, EV_WRITE | EV_PERSIST)) {
                        d->downstream_used_start = 1;
                        d->downstream_used       = 1;

                        if (cproxy_dettach_if_noreply(d, uc) == false) {
                            cproxy_start_downstream_timeout(d, c);

                            // During a synchronous (with-reply) SET,
                            // handle fire-&-forget SET optimization.
                            //
                            if (cmd == NREAD_SET &&
                                cproxy_optimize_set_ascii(d, uc,
                                                          ITEM_key(it),
                                                          it->nkey)) {
                                d->ptd->stats.stats.tot_optimize_sets++;
                            }
                        } else {
                            c->write_and_go = conn_pause;

                            mcache_delete(&d->ptd->proxy->front_cache,
                                          ITEM_key(it), it->nkey);
                        }

                        return true;
                    }
                }

                d->ptd->stats.stats.err_oom++;
                cproxy_close_conn(c);
            } else {
                // TODO: Handle this weird error case.
            }
        } else {
            d->ptd->stats.stats.err_downstream_write_prep++;
            cproxy_close_conn(c);
        }

        if (settings.verbose > 1)
            fprintf(stderr, "Proxy item write out of memory");
    }

    return false;
}
Beispiel #27
0
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
                    const rel_time_t exptime, const int nbytes) {
    int i;
    uint8_t nsuffix;
    item *it = NULL;
    char suffix[40];
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
    if (settings.use_cas) {
        ntotal += sizeof(uint64_t);
    }

    unsigned int id = slabs_clsid(ntotal);
    if (id == 0)
        return 0;

    /* If no memory is available, attempt a direct LRU juggle/eviction */
    /* This is a race in order to simplify lru_pull_tail; in cases where
     * locked items are on the tail, you want them to fall out and cause
     * occasional OOM's, rather than internally work around them.
     * This also gives one fewer code path for slab alloc/free
     */
    /* TODO: if power_largest, try a lot more times? or a number of times
     * based on how many chunks the new object should take up?
     * or based on the size of an object lru_pull_tail() says it evicted?
     * This is a classical GC problem if "large items" are of too varying of
     * sizes. This is actually okay here since the larger the data, the more
     * bandwidth it takes, the more time we can loop in comparison to serving
     * and replacing small items.
     */
    for (i = 0; i < 10; i++) {
        uint64_t total_bytes;
        /* Try to reclaim memory first */
        if (!settings.lru_maintainer_thread) {
            lru_pull_tail(id, COLD_LRU, 0, 0);
        }
        it = slabs_alloc(ntotal, id, &total_bytes, 0);

        if (settings.expirezero_does_not_evict)
            total_bytes -= noexp_lru_size(id);

        if (it == NULL) {
            if (settings.lru_maintainer_thread) {
                lru_pull_tail(id, HOT_LRU, total_bytes, 0);
                lru_pull_tail(id, WARM_LRU, total_bytes, 0);
                if (lru_pull_tail(id, COLD_LRU, total_bytes, LRU_PULL_EVICT) <= 0)
                    break;
            } else {
                if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT) <= 0)
                    break;
            }
        } else {
            break;
        }
    }

    if (i > 0) {
        pthread_mutex_lock(&lru_locks[id]);
        itemstats[id].direct_reclaims += i;
        pthread_mutex_unlock(&lru_locks[id]);
    }

    if (it == NULL) {
        pthread_mutex_lock(&lru_locks[id]);
        itemstats[id].outofmemory++;
        pthread_mutex_unlock(&lru_locks[id]);
        return NULL;
    }

    assert(it->slabs_clsid == 0);
    //assert(it != heads[id]);

    /* Refcount is seeded to 1 by slabs_alloc() */
    it->next = it->prev = 0;

    /* Items are initially loaded into the HOT_LRU. This is '0' but I want at
     * least a note here. Compiler (hopefully?) optimizes this out.
     */
    if (settings.lru_maintainer_thread) {
        if (exptime == 0 && settings.expirezero_does_not_evict) {
            id |= NOEXP_LRU;
        } else {
            id |= HOT_LRU;
        }
    } else {
        /* There is only COLD in compat-mode */
        id |= COLD_LRU;
    }
    it->slabs_clsid = id;

    DEBUG_REFCNT(it, '*');
    it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    memcpy(ITEM_key(it), key, nkey);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;

    /* Need to shuffle the pointer stored in h_next into it->data. */
    if (it->it_flags & ITEM_CHUNKED) {
        item_chunk *chunk = (item_chunk *) ITEM_data(it);

        chunk->next = (item_chunk *) it->h_next;
        chunk->prev = 0;
        chunk->head = it;
        /* Need to chain back into the head's chunk */
        chunk->next->prev = chunk;
        chunk->size = chunk->next->size - ((char *)chunk - (char *)it);
        chunk->used = 0;
        assert(chunk->size > 0);
    }
    it->h_next = 0;

    return it;
}