/* * __REC_SYNC -- sync the recno tree to disk. * * Parameters: * dbp: pointer to access method * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_sync(const DB *dbp, u_int flags) { struct iovec iov[2]; BTREE *t; DBT data, key; off_t off; recno_t scursor, trec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } if (flags == R_RECNOSYNC) return (__bt_sync(dbp, 0)); if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED)) return (RET_SUCCESS); /* Read any remaining records into the tree. */ if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); /* Rewind the file descriptor. */ if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0) return (RET_ERROR); /* Save the cursor. */ scursor = t->bt_cursor.rcursor; key.size = sizeof(recno_t); key.data = &trec; if (F_ISSET(t, R_FIXLEN)) { /* * We assume that fixed length records are all fixed length. * Any that aren't are either EINVAL'd or corrected by the * record put code. */ status = (dbp->seq)(dbp, &key, &data, R_FIRST); while (status == RET_SUCCESS) { if (write(t->bt_rfd, data.data, data.size) != (ssize_t) data.size) return (RET_ERROR); status = (dbp->seq)(dbp, &key, &data, R_NEXT); } } else { iov[1].iov_base = &t->bt_bval; iov[1].iov_len = 1; status = (dbp->seq)(dbp, &key, &data, R_FIRST); while (status == RET_SUCCESS) { iov[0].iov_base = data.data; iov[0].iov_len = data.size; if (writev(t->bt_rfd, iov, 2) != (ssize_t) (data.size + 1)) return (RET_ERROR); status = (dbp->seq)(dbp, &key, &data, R_NEXT); } } /* Restore the cursor. */ t->bt_cursor.rcursor = scursor; if (status == RET_ERROR) return (RET_ERROR); if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1) return (RET_ERROR); if (ftruncate(t->bt_rfd, off)) return (RET_ERROR); F_CLR(t, R_MODIFIED); return (RET_SUCCESS); }
DB * __rec_open(const char *fname, int flags, mode_t mode, const RECNOINFO *openinfo, int dflags) { BTREE *t; BTREEINFO btopeninfo; DB *dbp; PAGE *h; struct stat sb; int rfd = -1; /* pacify gcc */ int sverrno; dbp = NULL; /* Open the user's file -- if this fails, we're done. */ if (fname != NULL) { #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif if ((rfd = open(fname, flags | O_CLOEXEC, mode)) == -1) return NULL; #if O_CLOEXEC == 0 if (fcntl(rfd, F_SETFD, FD_CLOEXEC) == -1) goto err; #endif } /* Create a btree in memory (backed by disk). */ if (openinfo) { if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT)) goto einval; btopeninfo.flags = 0; btopeninfo.cachesize = openinfo->cachesize; btopeninfo.maxkeypage = 0; btopeninfo.minkeypage = 0; btopeninfo.psize = openinfo->psize; btopeninfo.compare = NULL; btopeninfo.prefix = NULL; btopeninfo.lorder = openinfo->lorder; dbp = __bt_open(openinfo->bfname, O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags); } else dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags); if (dbp == NULL) goto err; /* * Some fields in the tree structure are recno specific. Fill them * in and make the btree structure look like a recno structure. We * don't change the bt_ovflsize value, it's close enough and slightly * bigger. */ t = dbp->internal; if (openinfo) { if (openinfo->flags & R_FIXEDLEN) { F_SET(t, R_FIXLEN); t->bt_reclen = openinfo->reclen; if (t->bt_reclen == 0) goto einval; } t->bt_bval = openinfo->bval; } else t->bt_bval = '\n'; F_SET(t, R_RECNO); if (fname == NULL) F_SET(t, R_EOF | R_INMEM); else t->bt_rfd = rfd; if (fname != NULL) { /* * In 4.4BSD, stat(2) returns true for ISSOCK on pipes. * Unfortunately, that's not portable, so we use lseek * and check the errno values. */ errno = 0; if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) { switch (flags & O_ACCMODE) { case O_RDONLY: F_SET(t, R_RDONLY); break; default: goto einval; } slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) goto err; F_SET(t, R_CLOSEFP); t->bt_irec = F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; } else { switch (flags & O_ACCMODE) { case O_RDONLY: F_SET(t, R_RDONLY); break; case O_RDWR: break; default: goto einval; } if (fstat(rfd, &sb)) goto err; /* * Kluge -- we'd like to test to see if the file is too * big to mmap. Since, we don't know what size or type * off_t's or size_t's are, what the largest unsigned * integral type is, or what random insanity the local * C compiler will perpetrate, doing the comparison in * a portable way is flatly impossible. Hope that mmap * fails if the file is too large. */ if (sb.st_size == 0) F_SET(t, R_EOF); else { #ifdef MMAP_NOT_AVAILABLE /* * XXX * Mmap doesn't work correctly on many current * systems. In particular, it can fail subtly, * with cache coherency problems. Don't use it * for now. */ t->bt_msize = sb.st_size; if ((t->bt_smap = mmap(NULL, t->bt_msize, PROT_READ, MAP_FILE | MAP_PRIVATE, rfd, (off_t)0)) == (caddr_t)-1) goto slow; t->bt_cmap = t->bt_smap; t->bt_emap = t->bt_smap + sb.st_size; t->bt_irec = F_ISSET(t, R_FIXLEN) ? __rec_fmap : __rec_vmap; F_SET(t, R_MEMMAPPED); #else goto slow; #endif } } } /* Use the recno routines. */ dbp->close = __rec_close; dbp->del = __rec_delete; dbp->fd = __rec_fd; dbp->get = __rec_get; dbp->put = __rec_put; dbp->seq = __rec_seq; dbp->sync = __rec_sync; /* If the root page was created, reset the flags. */ if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL) goto err; if ((h->flags & P_TYPE) == P_BLEAF) { F_CLR(h, P_TYPE); F_SET(h, P_RLEAF); mpool_put(t->bt_mp, h, MPOOL_DIRTY); } else mpool_put(t->bt_mp, h, 0); if (openinfo && openinfo->flags & R_SNAPSHOT && !F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) goto err; return (dbp); einval: errno = EINVAL; err: sverrno = errno; if (dbp != NULL) (void)__bt_close(dbp); if (fname != NULL) (void)close(rfd); errno = sverrno; return (NULL); }
/* * __REC_PUT -- Add a recno item to the tree. * * Parameters: * dbp: pointer to access method * key: key * data: data * flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is * already in the tree and R_NOOVERWRITE specified. */ int __rec_put(const DB *dbp, DBT *key, const DBT *data, u_int flags) { BTREE *t; DBT fdata, tdata; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } /* * If using fixed-length records, and the record is long, return * EINVAL. If it's short, pad it out. Use the record data return * memory, it's only short-term. */ if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) { if (data->size > t->bt_reclen) goto einval; if (t->bt_rdata.size < t->bt_reclen) { t->bt_rdata.data = reallocf(t->bt_rdata.data, t->bt_reclen); if (t->bt_rdata.data == NULL) return (RET_ERROR); t->bt_rdata.size = t->bt_reclen; } memmove(t->bt_rdata.data, data->data, data->size); memset((char *)t->bt_rdata.data + data->size, t->bt_bval, t->bt_reclen - data->size); fdata.data = t->bt_rdata.data; fdata.size = t->bt_reclen; } else { fdata.data = data->data; fdata.size = data->size; } switch (flags) { case R_CURSOR: if (!F_ISSET(&t->bt_cursor, CURS_INIT)) goto einval; nrec = t->bt_cursor.rcursor; break; case R_SETCURSOR: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_IAFTER: if ((nrec = *(recno_t *)key->data) == 0) { nrec = 1; flags = R_IBEFORE; } break; case 0: case R_IBEFORE: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_NOOVERWRITE: if ((nrec = *(recno_t *)key->data) == 0) goto einval; if (nrec <= t->bt_nrecs) return (RET_SPECIAL); break; default: einval: errno = EINVAL; return (RET_ERROR); } /* * Make sure that records up to and including the put record are * already in the database. If skipping records, create empty ones. */ if (nrec > t->bt_nrecs) { if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, nrec) == RET_ERROR) return (RET_ERROR); if (nrec > t->bt_nrecs + 1) { if (F_ISSET(t, R_FIXLEN)) { if ((tdata.data = (void *)malloc(t->bt_reclen)) == NULL) return (RET_ERROR); tdata.size = t->bt_reclen; memset(tdata.data, t->bt_bval, tdata.size); } else { tdata.data = NULL; tdata.size = 0; } while (nrec > t->bt_nrecs + 1) if (__rec_iput(t, t->bt_nrecs, &tdata, 0) != RET_SUCCESS) return (RET_ERROR); if (F_ISSET(t, R_FIXLEN)) free(tdata.data); } } if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS) return (status); switch (flags) { case R_IAFTER: nrec++; break; case R_SETCURSOR: t->bt_cursor.rcursor = nrec; break; } F_SET(t, R_MODIFIED); return (__rec_ret(t, NULL, nrec, key, NULL)); }
/* * __REC_SEQ -- Recno sequential scan interface. * * Parameters: * dbp: pointer to access method * key: key for positioning and return value * data: data return value * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. * * Returns: * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ int __rec_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) { BTREE *t; EPG *e; recno_t nrec; int status; t = dbp->internal; /* Toss any page pinned across calls. */ if (t->bt_pinned != NULL) { mpool_put(t->bt_mp, t->bt_pinned, 0); t->bt_pinned = NULL; } switch(flags) { case R_CURSOR: if ((nrec = *(recno_t *)key->data) == 0) goto einval; break; case R_NEXT: if (F_ISSET(&t->bt_cursor, CURS_INIT)) { nrec = t->bt_cursor.rcursor + 1; break; } /* FALLTHROUGH */ case R_FIRST: nrec = 1; break; case R_PREV: if (F_ISSET(&t->bt_cursor, CURS_INIT)) { if ((nrec = t->bt_cursor.rcursor - 1) == 0) return (RET_SPECIAL); break; } /* FALLTHROUGH */ case R_LAST: if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); nrec = t->bt_nrecs; break; default: einval: errno = EINVAL; return (RET_ERROR); } if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) { if (!F_ISSET(t, R_EOF | R_INMEM) && (status = t->bt_irec(t, nrec)) != RET_SUCCESS) return (status); if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) return (RET_SPECIAL); } if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL) return (RET_ERROR); F_SET(&t->bt_cursor, CURS_INIT); t->bt_cursor.rcursor = nrec; status = __rec_ret(t, e, nrec, key, data); if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); }