static int begin_txn(struct dbengine *db, struct txn **tidptr, int readonly) { struct txn *tid = xzmalloc(sizeof(struct txn)); int mflags, mr, r; struct MDB_txn *parent = NULL; assert(db && tidptr); PDEBUG("cyrusdb_lmdb(%s)[begin_txn] readonly=%d tidptr=%p *tidptr=%p", db->fname, readonly, tidptr, tidptr ? *tidptr : NULL); /* Read-only transactions may only be master transactions and do * not allow nested transactions. */ readonly = !db->tid ? readonly : 0; /* * Hacky workaround, similar to skiplist * * If no transaction was passed, but we're in a transaction, * then create a nested exception within the current main * transaction. * * Note that transactions are always either the main transaction, * or a direct descendant of it. There are no deeper transaction * levels supported (although LMDB supports them). */ if (db->tid) { parent = db->tid->mtxn; } /* Begin a new LMDB transaction */ mr = mdb_txn_begin(db->env, parent, readonly ? MDB_RDONLY : 0, &tid->mtxn); if (mr) goto fail; /* Open the database */ mflags = db->flags & CYRUSDB_CREATE ? MDB_CREATE : 0; mr = mdb_dbi_open(tid->mtxn, NULL /*name*/, mflags, &tid->dbi); if (mr) goto fail; if (db->flags & CYRUSDB_MBOXSORT) { /* Set mboxsort order */ mr = mdb_set_compare(tid->mtxn, tid->dbi, mboxcmp); if (mr) goto fail; } if (!db->tid) { /* Set the master transaction */ db->tid = tid; } *tidptr = tid; return CYRUSDB_OK; fail: r = my_mdberror(mr); if (tid->mtxn) abort_txn(db, tid); syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr)); return r; }
static int fetch(struct dbengine *db, const char *key, size_t keylen, const char **data, size_t *datalen, struct txn **tidptr) { MDB_val mkey, mval; struct txn *tid; int r, r2 = 0, mr; PDEBUG("cyrusdb_lmdb(%s)[fetch] tidptr=%p *tidptr=%p key='%.*s'", db->fname, tidptr, tidptr ? *tidptr : NULL, (int) keylen, key); assert(db && key); mkey.mv_data = (void*) key; mkey.mv_size = keylen; /* Open or reuse transaction */ r = getorset_txn(db, tidptr, &tid, !tidptr /*readonly*/); if (r) goto fail; mr = mdb_get(tid->mtxn, tid->dbi, &mkey, &mval); if (mr == MDB_NOTFOUND) { /* That's not an error */ r = CYRUSDB_NOTFOUND; if (datalen) *datalen = 0; if (data) *data = NULL; } else if (mr) { /* That's an error */ syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr)); r = CYRUSDB_INTERNAL; goto fail; } else if (data && datalen) { /* Cache the fetched data from LMDB memory in own buffer */ r = bufferval(db, mval, data, datalen); if (r) goto fail; } /* Commit or export the transaction */ if (!tidptr) { r2 = commit_txn(db, tid); if (r2) goto fail; } else { *tidptr = tid; r2 = CYRUSDB_OK; } return r ? r : r2; fail: if (tid && (!tidptr || !*tidptr)) abort_txn(db, tid); return r ? r : r2; }
static int put(struct dbengine *db, const char *key, size_t keylen, const char *data, size_t datalen, struct txn **tidptr, int mflags) { MDB_val mkey, mval; struct txn *tid; int r, mr; mkey.mv_data = (void*) key; mkey.mv_size = keylen; mval.mv_data = (void*) data; mval.mv_size = datalen; /* Invalidate cursor */ if (db->mcur) { mdb_cursor_close(db->mcur); db->mcur = NULL; } /* Open or reuse transaction */ r = getorset_txn(db, tidptr, &tid, 0); if (r) goto fail; mr = mdb_put(tid->mtxn, tid->dbi, &mkey, &mval, mflags); if (mr) { /* Return the appropriate error code for existing key overwrites */ syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr)); r = (mr == MDB_KEYEXIST && (mflags & MDB_NOOVERWRITE)) ? \ CYRUSDB_EXISTS : CYRUSDB_INTERNAL; goto fail; } /* Commit or export the transaction */ if (!tidptr) { r = commit_txn(db, tid); if (r) goto fail; } else { *tidptr = tid; } return CYRUSDB_OK; fail: if (tid && (!tidptr || !*tidptr)) abort_txn(db, tid); return r; }
static void close_db(struct dbengine *db) { assert(db); if (db->tid) { syslog(LOG_ERR, "cyrusdb_lmdb(%s): stray transaction %p", db->fname, db->tid); abort_txn(db, db->tid); } if (db->env) { mdb_env_close(db->env); } if (db->data) { free(db->data); } free(db->fname); free(db); }
static int foreach(struct dbengine *db, const char *prefix, size_t prefixlen, foreach_p *p, foreach_cb *cb, void *rock, struct txn **tidptr) { int r, r2, mr = 0; struct txn *tid = NULL; MDB_val mkey, mval; enum MDB_cursor_op op; struct buf cur = BUF_INITIALIZER; PDEBUG("cyrusdb_lmdb(%s)[foreach] tidptr=%p *tidptr=%p prefix='%.*s'", db->fname, tidptr, tidptr ? *tidptr : NULL, (int) prefixlen, prefix); assert(db); /* Open or reuse transaction */ r = getorset_txn(db, tidptr, &tid, 0); if (r) goto fail; mr = mdb_cursor_open(tid->mtxn, tid->dbi, &db->mcur); if (mr) goto fail; /* Normalize and set prefix for search */ if (prefix && !prefixlen) { prefix = NULL; } /* Initialize cursor */ mkey.mv_data = (void*) prefix; mkey.mv_size = prefix ? prefixlen : 0; op = prefix ? MDB_SET_RANGE : MDB_FIRST; mr = mdb_cursor_get(db->mcur, &mkey, &mval, op); /* Iterate cursor until no records or out of range */ while (!mr) { if (prefixlen && (mkey.mv_size < prefixlen)) break; if (prefix && memcmp(mkey.mv_data, prefix, prefixlen)) break; if (!p || p(rock, mkey.mv_data, mkey.mv_size, mval.mv_data, mval.mv_size)) { /* Cache the current position in local memory */ buf_setmap(&cur, mkey.mv_data, mkey.mv_size); r = cb(rock, cur.s, cur.len, mval.mv_data, mval.mv_size); if (r) break; if (db->mcur == NULL) { /* An update has invalidated the cursor. Reseek cursor. */ mr = mdb_cursor_open(tid->mtxn, tid->dbi, &db->mcur); if (mr) break; mkey.mv_data = cur.s; mkey.mv_size = cur.len; mr = mdb_cursor_get(db->mcur, &mkey, &mval, MDB_SET_RANGE); if (mr) break; if (mkey.mv_size != cur.len || memcmp(mkey.mv_data, cur.s, cur.len)) { /* The current position has been deleted. */ continue; } } } /* Advance cursor */ mr = mdb_cursor_get(db->mcur, &mkey, &mval, MDB_NEXT); } if (mr && mr != MDB_NOTFOUND) goto fail; if (db->mcur) { mdb_cursor_close(db->mcur); db->mcur = NULL; } buf_free(&cur); /* Export or commit transaction */ r2 = tidptr ? CYRUSDB_OK : commit_txn(db, tid); return r ? r : r2; fail: if (db->mcur) { mdb_cursor_close(db->mcur); db->mcur = NULL; } buf_free(&cur); if (tid && (!tidptr || !*tidptr)) abort_txn(db, tid); if (mr) { syslog(LOG_ERR, "cryusdb_lmdb(%s): %s", db->fname, mdb_strerror(mr)); r = my_mdberror(mr); } return r; }
static int mystore(struct dbengine *db, const char *key, size_t keylen, const char *data, size_t datalen, struct txn **mytid, int overwrite) { int r = 0; char fnamebuf[1024]; int offset; unsigned long len; const char *lockfailaction; int writefd; struct iovec iov[10]; int niov; struct stat sbuf; struct buf keybuf = BUF_INITIALIZER; struct buf databuf = BUF_INITIALIZER; /* lock file, if needed */ if (!mytid || !*mytid) { r = lock_reopen(db->fd, db->fname, &sbuf, &lockfailaction); if (r < 0) { syslog(LOG_ERR, "IOERROR: %s %s: %m", lockfailaction, db->fname); return CYRUSDB_IOERROR; } if (sbuf.st_ino != db->ino) { db->ino = sbuf.st_ino; map_free(&db->base, &db->len); map_refresh(db->fd, 0, &db->base, &db->len, sbuf.st_size, db->fname, 0); db->size = sbuf.st_size; } if (mytid) { *mytid = new_txn(); } } encode(key, keylen, &keybuf); /* find entry, if it exists */ offset = bsearch_mem_mbox(keybuf.s, db->base, db->size, 0, &len); /* overwrite? */ if (len && !overwrite) { if (mytid) abort_txn(db, *mytid); buf_free(&keybuf); buf_free(&databuf); return CYRUSDB_EXISTS; } /* write new file */ if (mytid && (*mytid)->fnamenew) { strlcpy(fnamebuf, (*mytid)->fnamenew, sizeof(fnamebuf)); } else { strlcpy(fnamebuf, db->fname, sizeof(fnamebuf)); strlcat(fnamebuf, ".NEW", sizeof(fnamebuf)); } unlink(fnamebuf); r = writefd = open(fnamebuf, O_RDWR | O_CREAT, 0666); if (r < 0) { syslog(LOG_ERR, "opening %s for writing failed: %m", fnamebuf); if (mytid) abort_txn(db, *mytid); buf_free(&keybuf); buf_free(&databuf); return CYRUSDB_IOERROR; } niov = 0; if (offset) { WRITEV_ADD_TO_IOVEC(iov, niov, (char *) db->base, offset); } if (data) { /* new entry */ encode(data, datalen, &databuf); WRITEV_ADD_TO_IOVEC(iov, niov, keybuf.s, keybuf.len); WRITEV_ADD_TO_IOVEC(iov, niov, "\t", 1); WRITEV_ADD_TO_IOVEC(iov, niov, databuf.s, databuf.len); WRITEV_ADD_TO_IOVEC(iov, niov, "\n", 1); } if (db->size - (offset + len) > 0) { WRITEV_ADD_TO_IOVEC(iov, niov, (char *) db->base + offset + len, db->size - (offset + len)); } /* do the write */ r = retry_writev(writefd, iov, niov); if (r == -1) { syslog(LOG_ERR, "IOERROR: writing %s: %m", fnamebuf); close(writefd); if (mytid) abort_txn(db, *mytid); buf_free(&keybuf); buf_free(&databuf); return CYRUSDB_IOERROR; } r = 0; if (mytid) { /* setup so further accesses will be against fname.NEW */ if (fstat(writefd, &sbuf) == -1) { /* xxx ? */ } if (!(*mytid)->fnamenew) (*mytid)->fnamenew = xstrdup(fnamebuf); if ((*mytid)->fd) close((*mytid)->fd); (*mytid)->fd = writefd; map_free(&db->base, &db->len); map_refresh(writefd, 0, &db->base, &db->len, sbuf.st_size, fnamebuf, 0); db->size = sbuf.st_size; } else { /* commit immediately */ if (fsync(writefd) || fstat(writefd, &sbuf) == -1 || rename(fnamebuf, db->fname) == -1) { syslog(LOG_ERR, "IOERROR: writing %s: %m", fnamebuf); close(writefd); buf_free(&keybuf); buf_free(&databuf); return CYRUSDB_IOERROR; } close(db->fd); db->fd = writefd; /* release lock */ r = lock_unlock(db->fd, db->fname); if (r == -1) { syslog(LOG_ERR, "IOERROR: unlocking db %s: %m", db->fname); r = CYRUSDB_IOERROR; } db->ino = sbuf.st_ino; map_free(&db->base, &db->len); map_refresh(writefd, 0, &db->base, &db->len, sbuf.st_size, db->fname, 0); db->size = sbuf.st_size; } buf_free(&keybuf); buf_free(&databuf); return r; }