/* * __rec_ret -- * Build return data. * * Parameters: * t: tree * e: key/data pair to be returned * nrec: record number * key: user's key structure * data: user's data structure * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_ret(BTREE *t, EPG *e, recno_t nrec, DBT *key, DBT *data) { RLEAF *rl; void *p; if (key == NULL) goto dataonly; /* We have to copy the key, it's not on the page. */ if (sizeof(recno_t) > t->bt_rkey.size) { p = realloc(t->bt_rkey.data, sizeof(recno_t)); if (p == NULL) return (RET_ERROR); t->bt_rkey.data = p; t->bt_rkey.size = sizeof(recno_t); } memmove(t->bt_rkey.data, &nrec, sizeof(recno_t)); key->size = sizeof(recno_t); key->data = t->bt_rkey.data; dataonly: if (data == NULL) return (RET_SUCCESS); /* * We must copy big keys/data to make them contigous. Otherwise, * leave the page pinned and don't copy unless the user specified * concurrent access. */ rl = GETRLEAF(e->page, e->index); if (rl->flags & P_BIGDATA) { if (__ovfl_get(t, rl->bytes, &data->size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); data->data = t->bt_rdata.data; } else if (F_ISSET(t, B_DB_LOCK)) { /* Use +1 in case the first record retrieved is 0 length. */ if (rl->dsize + 1 > t->bt_rdata.size) { p = realloc(t->bt_rdata.data, rl->dsize + 1); if (p == NULL) return (RET_ERROR); t->bt_rdata.data = p; t->bt_rdata.size = rl->dsize + 1; } memmove(t->bt_rdata.data, rl->bytes, rl->dsize); data->size = rl->dsize; data->data = t->bt_rdata.data; } else { data->size = rl->dsize; data->data = rl->bytes; } return (RET_SUCCESS); }
/* * __REC_DLEAF -- Delete a single record from a recno leaf page. * * Parameters: * t: tree * idx: index on current page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ int __rec_dleaf(BTREE *t, PAGE *h, u_int32_t idx) { RLEAF *rl; indx_t *ip, cnt, offset; u_int32_t nbytes; char *from; void *to; /* * Delete a record from a recno leaf page. Internal records are never * deleted from internal pages, regardless of the records that caused * them to be added being deleted. Pages made empty by deletion are * not reclaimed. They are, however, made available for reuse. * * Pack the remaining entries at the end of the page, shift the indices * down, overwriting the deleted record and its index. If the record * uses overflow pages, make them available for reuse. */ to = rl = GETRLEAF(h, idx); if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR) return (RET_ERROR); nbytes = NRLEAF(rl); /* * Compress the key/data pairs. Compress and adjust the [BR]LEAF * offsets. Reset the headers. */ from = (char *)h + h->upper; memmove(from + nbytes, from, (char *)to - from); h->upper += nbytes; offset = h->linp[idx]; for (cnt = &h->linp[idx] - (ip = &h->linp[0]); cnt--; ++ip) if (ip[0] < offset) ip[0] += nbytes; for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; h->lower -= sizeof(indx_t); --t->bt_nrecs; return (RET_SUCCESS); }
/* * BT_PSPLIT -- Do the real work of splitting the page. * * Parameters: * t: tree * h: page to be split * l: page to put lower half of data * r: page to put upper half of data * pskip: pointer to index to leave open * ilen: insert length * * Returns: * Pointer to page in which to insert. */ static PAGE * bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen) { BINTERNAL *bi; BLEAF *bl; CURSOR *c; RLEAF *rl; PAGE *rval; void *src; indx_t full, half, nxt, off, skip, top, used; u_int32_t nbytes; int bigkeycnt, isbigkey; /* * Split the data to the left and right pages. Leave the skip index * open. Additionally, make some effort not to split on an overflow * key. This makes internal page processing faster and can save * space as overflow keys used by internal pages are never deleted. */ bigkeycnt = 0; skip = *pskip; full = t->bt_psize - BTDATAOFF; half = full / 2; used = 0; for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { if (skip == off) { nbytes = ilen; isbigkey = 0; /* XXX: not really known. */ } else switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); isbigkey = bi->flags & P_BIGKEY; break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); isbigkey = bl->flags & P_BIGKEY; break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; isbigkey = 0; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); isbigkey = 0; break; default: abort(); } /* * If the key/data pairs are substantial fractions of the max * possible size for the page, it's possible to get situations * where we decide to try and copy too much onto the left page. * Make sure that doesn't happen. */ if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) || nxt == top - 1) { --off; break; } /* Copy the key/data pair, if not the skipped index. */ if (skip != off) { ++nxt; l->linp[off] = l->upper -= nbytes; memmove((char *)l + l->upper, src, nbytes); } used += nbytes + sizeof(indx_t); if (used >= half) { if (!isbigkey || bigkeycnt == 3) break; else ++bigkeycnt; } } /* * Off is the last offset that's valid for the left page. * Nxt is the first offset to be placed on the right page. */ l->lower += (off + 1) * sizeof(indx_t); /* * If splitting the page that the cursor was on, the cursor has to be * adjusted to point to the same record as before the split. If the * cursor is at or past the skipped slot, the cursor is incremented by * one. If the cursor is on the right page, it is decremented by the * number of records split to the left page. */ c = &t->bt_cursor; if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { if (c->pg.index >= skip) ++c->pg.index; if (c->pg.index < nxt) /* Left page. */ c->pg.pgno = l->pgno; else { /* Right page. */ c->pg.pgno = r->pgno; c->pg.index -= nxt; } } /* * If the skipped index was on the left page, just return that page. * Otherwise, adjust the skip index to reflect the new position on * the right page. */ if (skip <= off) { skip = MAX_PAGE_OFFSET; rval = l; } else { rval = r; *pskip -= nxt; } for (off = 0; nxt < top; ++off) { if (skip == nxt) { ++off; skip = MAX_PAGE_OFFSET; } switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); break; default: abort(); } ++nxt; r->linp[off] = r->upper -= nbytes; memmove((char *)r + r->upper, src, nbytes); } r->lower += off * sizeof(indx_t); /* If the key is being appended to the page, adjust the index. */ if (skip == top) r->lower += sizeof(indx_t); return (rval); }
/* * BT_DPAGE -- Dump the page * * Parameters: * h: pointer to the PAGE */ void __bt_dpage(PAGE *h) { BINTERNAL *bi; BLEAF *bl; RINTERNAL *ri; RLEAF *rl; indx_t cur, top; char *sep; (void)fprintf(stderr, " page %u: (", h->pgno); #undef X #define X(flag, name) \ if (h->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } sep = ""; X(P_BINTERNAL, "BINTERNAL") /* types */ X(P_BLEAF, "BLEAF") X(P_RINTERNAL, "RINTERNAL") /* types */ X(P_RLEAF, "RLEAF") X(P_OVERFLOW, "OVERFLOW") X(P_PRESERVE, "PRESERVE"); (void)fprintf(stderr, ")\n"); #undef X (void)fprintf(stderr, "\tprev %2u next %2u", h->prevpg, h->nextpg); if (h->flags & P_OVERFLOW) return; top = NEXTINDEX(h); (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n", h->lower, h->upper, top); for (cur = 0; cur < top; cur++) { (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); switch (h->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(h, cur); (void)fprintf(stderr, "size %03d pgno %03d", bi->ksize, bi->pgno); if (bi->flags & P_BIGKEY) (void)fprintf(stderr, " (indirect)"); else if (bi->ksize) (void)fprintf(stderr, " {%.*s}", (int)bi->ksize, bi->bytes); break; case P_RINTERNAL: ri = GETRINTERNAL(h, cur); (void)fprintf(stderr, "entries %03d pgno %03d", ri->nrecs, ri->pgno); break; case P_BLEAF: bl = GETBLEAF(h, cur); if (bl->flags & P_BIGKEY) (void)fprintf(stderr, "big key page %u size %u/", *(pgno_t *)bl->bytes, *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); else if (bl->ksize) (void)fprintf(stderr, "%s/", bl->bytes); if (bl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %u size %u", *(pgno_t *)(bl->bytes + bl->ksize), *(u_int32_t *)(bl->bytes + bl->ksize + sizeof(pgno_t))); else if (bl->dsize) (void)fprintf(stderr, "%.*s", (int)bl->dsize, bl->bytes + bl->ksize); break; case P_RLEAF: rl = GETRLEAF(h, cur); if (rl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %u size %u", *(pgno_t *)rl->bytes, *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); else if (rl->dsize) (void)fprintf(stderr, "%.*s", (int)rl->dsize, rl->bytes); break; } (void)fprintf(stderr, "\n"); } }