Exemplo n.º 1
0
static int begin_txn(struct dbengine *db, struct txn **tidptr, int readonly)
{
    struct txn *tid = xzmalloc(sizeof(struct txn));
    int mflags, mr, r;
    struct MDB_txn *parent = NULL;

    assert(db && tidptr);

    PDEBUG("cyrusdb_lmdb(%s)[begin_txn] readonly=%d tidptr=%p *tidptr=%p",
            db->fname, readonly, tidptr, tidptr ? *tidptr : NULL);

    /* Read-only transactions may only be master transactions and do
     * not allow nested transactions. */
    readonly = !db->tid ? readonly : 0;

    /*
     * Hacky workaround, similar to skiplist
     *
     * If no transaction was passed, but we're in a transaction,
     * then create a nested exception within the current main
     * transaction.
     *
     * Note that transactions are always either the main transaction,
     * or a direct descendant of it. There are no deeper transaction
     * levels supported (although LMDB supports them).
     */
    if (db->tid) {
        parent = db->tid->mtxn;
    }

    /* Begin a new LMDB transaction */
    mr = mdb_txn_begin(db->env, parent, readonly ? MDB_RDONLY : 0, &tid->mtxn);
    if (mr) goto fail;

    /* Open the database */
    mflags = db->flags & CYRUSDB_CREATE ? MDB_CREATE : 0;
    mr = mdb_dbi_open(tid->mtxn, NULL /*name*/, mflags, &tid->dbi);
    if (mr) goto fail;

    if (db->flags & CYRUSDB_MBOXSORT) {
        /* Set mboxsort order */
        mr = mdb_set_compare(tid->mtxn, tid->dbi, mboxcmp);
        if (mr) goto fail;
    }

    if (!db->tid) {
        /* Set the master transaction */
        db->tid = tid;
    }
    *tidptr = tid;
    return CYRUSDB_OK;

fail:
    r = my_mdberror(mr);
    if (tid->mtxn) abort_txn(db, tid);
    syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr));
    return r;
}
Exemplo n.º 2
0
static int fetch(struct dbengine *db, const char *key, size_t keylen,
                 const char **data, size_t *datalen, struct txn **tidptr)
{
    MDB_val mkey, mval;
    struct txn *tid;
    int r, r2 = 0, mr;

    PDEBUG("cyrusdb_lmdb(%s)[fetch] tidptr=%p *tidptr=%p key='%.*s'",
            db->fname, tidptr, tidptr ? *tidptr : NULL, (int) keylen, key);
    assert(db && key);

    mkey.mv_data = (void*) key;
    mkey.mv_size = keylen;

    /* Open or reuse transaction */
    r = getorset_txn(db, tidptr, &tid, !tidptr /*readonly*/);
    if (r) goto fail;

    mr = mdb_get(tid->mtxn, tid->dbi, &mkey, &mval);
    if (mr == MDB_NOTFOUND) {
        /* That's not an error */
        r = CYRUSDB_NOTFOUND;
        if (datalen) *datalen = 0;
        if (data) *data = NULL;
    } else if (mr) {
        /* That's an error */
        syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr));
        r = CYRUSDB_INTERNAL;
        goto fail;
    } else if (data && datalen) {
        /* Cache the fetched data from LMDB memory in own buffer */
        r = bufferval(db, mval, data, datalen);
        if (r) goto fail;
    }

    /* Commit or export the transaction */
    if (!tidptr) {
        r2 = commit_txn(db, tid);
        if (r2) goto fail;
    } else {
        *tidptr = tid;
        r2 = CYRUSDB_OK;
    }

    return r ? r : r2;

fail:
    if (tid && (!tidptr || !*tidptr)) abort_txn(db, tid);
    return r ? r : r2;
}
Exemplo n.º 3
0
static int put(struct dbengine *db, const char *key, size_t keylen,
               const char *data, size_t datalen, struct txn **tidptr, int mflags)
{
    MDB_val mkey, mval;
    struct txn *tid;
    int r, mr;

    mkey.mv_data = (void*) key;
    mkey.mv_size = keylen;
    mval.mv_data = (void*) data;
    mval.mv_size = datalen;

    /* Invalidate cursor */
    if (db->mcur) {
        mdb_cursor_close(db->mcur);
        db->mcur = NULL;
    }

    /* Open or reuse transaction */
    r = getorset_txn(db, tidptr, &tid, 0);
    if (r) goto fail;

    mr = mdb_put(tid->mtxn, tid->dbi, &mkey, &mval, mflags);
    if (mr) {
        /* Return the appropriate error code for existing key overwrites */
        syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr));
        r = (mr == MDB_KEYEXIST && (mflags & MDB_NOOVERWRITE)) ? \
            CYRUSDB_EXISTS : CYRUSDB_INTERNAL;
        goto fail;
    }

    /* Commit or export the transaction */
    if (!tidptr) {
        r = commit_txn(db, tid);
        if (r) goto fail;
    } else {
        *tidptr = tid;
    }

    return CYRUSDB_OK;

fail:
    if (tid && (!tidptr || !*tidptr)) abort_txn(db, tid);
    return r;
}
Exemplo n.º 4
0
static void close_db(struct dbengine *db)
{
    assert(db);

    if (db->tid) {
        syslog(LOG_ERR, "cyrusdb_lmdb(%s): stray transaction %p",
                db->fname, db->tid);
        abort_txn(db, db->tid);
    }
    if (db->env) {
        mdb_env_close(db->env);
    }
    if (db->data) {
        free(db->data);
    }
    free(db->fname);
    free(db);
}
Exemplo n.º 5
0
static int foreach(struct dbengine *db, const char *prefix, size_t prefixlen,
                   foreach_p *p, foreach_cb *cb, void *rock, struct txn **tidptr)
{
    int r, r2, mr = 0;
    struct txn *tid = NULL;
    MDB_val mkey, mval;
    enum MDB_cursor_op op;
    struct buf cur = BUF_INITIALIZER;

    PDEBUG("cyrusdb_lmdb(%s)[foreach] tidptr=%p *tidptr=%p prefix='%.*s'",
            db->fname, tidptr, tidptr ? *tidptr : NULL, (int) prefixlen, prefix);
    assert(db);

    /* Open or reuse transaction */
    r = getorset_txn(db, tidptr, &tid, 0);
    if (r) goto fail;

    mr = mdb_cursor_open(tid->mtxn, tid->dbi, &db->mcur);
    if (mr) goto fail;

    /* Normalize and set prefix for search */
    if (prefix && !prefixlen) {
        prefix = NULL;
    }

    /* Initialize cursor */
    mkey.mv_data = (void*) prefix;
    mkey.mv_size = prefix ? prefixlen : 0;
    op = prefix ? MDB_SET_RANGE : MDB_FIRST;
    mr = mdb_cursor_get(db->mcur, &mkey, &mval, op);

    /* Iterate cursor until no records or out of range */
    while (!mr) {
        if (prefixlen && (mkey.mv_size < prefixlen))
            break;

        if (prefix && memcmp(mkey.mv_data, prefix, prefixlen))
            break;

        if (!p || p(rock, mkey.mv_data, mkey.mv_size, mval.mv_data, mval.mv_size)) {
            /* Cache the current position in local memory */
            buf_setmap(&cur, mkey.mv_data, mkey.mv_size);

            r = cb(rock, cur.s, cur.len, mval.mv_data, mval.mv_size);
            if (r) break;

            if (db->mcur == NULL) {
                /* An update has invalidated the cursor. Reseek cursor. */
                mr = mdb_cursor_open(tid->mtxn, tid->dbi, &db->mcur);
                if (mr) break;

                mkey.mv_data = cur.s;
                mkey.mv_size = cur.len;
                mr = mdb_cursor_get(db->mcur, &mkey, &mval, MDB_SET_RANGE);
                if (mr) break;

                if (mkey.mv_size != cur.len || memcmp(mkey.mv_data, cur.s, cur.len)) {
                    /* The current position has been deleted. */
                    continue;
                }
            }
        }

        /* Advance cursor */
        mr = mdb_cursor_get(db->mcur, &mkey, &mval, MDB_NEXT);
    }

    if (mr && mr != MDB_NOTFOUND)
        goto fail;

    if (db->mcur) {
        mdb_cursor_close(db->mcur);
        db->mcur = NULL;
    }
    buf_free(&cur);

    /* Export or commit transaction */
    r2 = tidptr ? CYRUSDB_OK : commit_txn(db, tid);

    return r ? r : r2;

fail:
    if (db->mcur) {
        mdb_cursor_close(db->mcur);
        db->mcur = NULL;
    }
    buf_free(&cur);

    if (tid && (!tidptr || !*tidptr))
        abort_txn(db, tid);
    if (mr) {
        syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr));
        r = my_mdberror(mr);
    }
    return r;
}
Exemplo n.º 6
0
static int mystore(struct dbengine *db,
                   const char *key, size_t keylen,
                   const char *data, size_t datalen,
                   struct txn **mytid, int overwrite)
{
    int r = 0;
    char fnamebuf[1024];
    int offset;
    unsigned long len;
    const char *lockfailaction;
    int writefd;
    struct iovec iov[10];
    int niov;
    struct stat sbuf;
    struct buf keybuf = BUF_INITIALIZER;
    struct buf databuf = BUF_INITIALIZER;

    /* lock file, if needed */
    if (!mytid || !*mytid) {
        r = lock_reopen(db->fd, db->fname, &sbuf, &lockfailaction);
        if (r < 0) {
            syslog(LOG_ERR, "IOERROR: %s %s: %m", lockfailaction, db->fname);
            return CYRUSDB_IOERROR;
        }

        if (sbuf.st_ino != db->ino) {
            db->ino = sbuf.st_ino;
            map_free(&db->base, &db->len);
            map_refresh(db->fd, 0, &db->base, &db->len,
                        sbuf.st_size, db->fname, 0);
            db->size = sbuf.st_size;
        }

        if (mytid) {
            *mytid = new_txn();
        }
    }

    encode(key, keylen, &keybuf);

    /* find entry, if it exists */
    offset = bsearch_mem_mbox(keybuf.s, db->base, db->size, 0, &len);

    /* overwrite? */
    if (len && !overwrite) {
        if (mytid) abort_txn(db, *mytid);
        buf_free(&keybuf);
        buf_free(&databuf);
        return CYRUSDB_EXISTS;
    }

    /* write new file */
    if (mytid && (*mytid)->fnamenew) {
        strlcpy(fnamebuf, (*mytid)->fnamenew, sizeof(fnamebuf));
    } else {
        strlcpy(fnamebuf, db->fname, sizeof(fnamebuf));
        strlcat(fnamebuf, ".NEW", sizeof(fnamebuf));
    }

    unlink(fnamebuf);
    r = writefd = open(fnamebuf, O_RDWR | O_CREAT, 0666);
    if (r < 0) {
        syslog(LOG_ERR, "opening %s for writing failed: %m", fnamebuf);
        if (mytid) abort_txn(db, *mytid);
        buf_free(&keybuf);
        buf_free(&databuf);
        return CYRUSDB_IOERROR;
    }

    niov = 0;
    if (offset) {
        WRITEV_ADD_TO_IOVEC(iov, niov, (char *) db->base, offset);
    }

    if (data) {
        /* new entry */
        encode(data, datalen, &databuf);
        WRITEV_ADD_TO_IOVEC(iov, niov, keybuf.s, keybuf.len);
        WRITEV_ADD_TO_IOVEC(iov, niov, "\t", 1);
        WRITEV_ADD_TO_IOVEC(iov, niov, databuf.s, databuf.len);
        WRITEV_ADD_TO_IOVEC(iov, niov, "\n", 1);
    }

    if (db->size - (offset + len) > 0) {
        WRITEV_ADD_TO_IOVEC(iov, niov, (char *) db->base + offset + len,
                            db->size - (offset + len));
    }

    /* do the write */
    r = retry_writev(writefd, iov, niov);
    if (r == -1) {
        syslog(LOG_ERR, "IOERROR: writing %s: %m", fnamebuf);
        close(writefd);
        if (mytid) abort_txn(db, *mytid);
        buf_free(&keybuf);
        buf_free(&databuf);
        return CYRUSDB_IOERROR;
    }
    r = 0;

    if (mytid) {
        /* setup so further accesses will be against fname.NEW */
        if (fstat(writefd, &sbuf) == -1) {
            /* xxx ? */
        }

        if (!(*mytid)->fnamenew) (*mytid)->fnamenew = xstrdup(fnamebuf);
        if ((*mytid)->fd) close((*mytid)->fd);
        (*mytid)->fd = writefd;
        map_free(&db->base, &db->len);
        map_refresh(writefd, 0, &db->base, &db->len, sbuf.st_size,
                    fnamebuf, 0);
        db->size = sbuf.st_size;
    } else {
        /* commit immediately */
        if (fsync(writefd) ||
            fstat(writefd, &sbuf) == -1 ||
            rename(fnamebuf, db->fname) == -1) {
            syslog(LOG_ERR, "IOERROR: writing %s: %m", fnamebuf);
            close(writefd);
            buf_free(&keybuf);
            buf_free(&databuf);
            return CYRUSDB_IOERROR;
        }

        close(db->fd);
        db->fd = writefd;

        /* release lock */
        r = lock_unlock(db->fd, db->fname);
        if (r == -1) {
            syslog(LOG_ERR, "IOERROR: unlocking db %s: %m", db->fname);
            r = CYRUSDB_IOERROR;
        }

        db->ino = sbuf.st_ino;
        map_free(&db->base, &db->len);
        map_refresh(writefd, 0, &db->base, &db->len, sbuf.st_size,
            db->fname, 0);
        db->size = sbuf.st_size;
    }

    buf_free(&keybuf);
    buf_free(&databuf);

    return r;
}