/* * REC_TOTAL -- Return the number of recno entries below a page. * * Parameters: * h: page * * Returns: * The number of recno entries below a page. * * XXX * These values could be set by the bt_psplit routine. The problem is that the * entry has to be popped off of the stack etc. or the values have to be passed * all the way back to bt_split/bt_rroot and it's not very clean. */ static recno_t rec_total(PAGE *h) { recno_t recs; indx_t nxt, top; for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) recs += GETRINTERNAL(h, nxt)->nrecs; return (recs); }
/* * __REC_SEARCH -- Search a btree for a key. * * Parameters: * t: tree to search * recno: key to find * op: search operation * * Returns: * EPG for matching record, if any, or the EPG for the location of the * key, if it were inserted into the tree. * * Returns: * The EPG for matching record, if any, or the EPG for the location * of the key, if it were inserted into the tree, is entered into * the bt_cur field of the tree. A pointer to the field is returned. */ EPG * __rec_search(BTREE *t, recno_t recno, enum SRCHOP op) { indx_t idx; PAGE *h; EPGNO *parent; RINTERNAL *r; pgno_t pg; indx_t top; recno_t total; int sverrno; BT_CLR(t); for (pg = P_ROOT, total = 0;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) goto err; if (h->flags & P_RLEAF) { t->bt_cur.page = h; t->bt_cur.index = recno - total; return (&t->bt_cur); } for (idx = 0, top = NEXTINDEX(h);;) { r = GETRINTERNAL(h, idx); if (++idx == top || total + r->nrecs > recno) break; total += r->nrecs; } BT_PUSH(t, pg, idx - 1); pg = r->pgno; switch (op) { case SDELETE: --GETRINTERNAL(h, (idx - 1))->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; case SINSERT: ++GETRINTERNAL(h, (idx - 1))->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); break; case SEARCH: mpool_put(t->bt_mp, h, 0); break; } } /* Try and recover the tree. */ err: sverrno = errno; if (op != SEARCH) while ((parent = BT_POP(t)) != NULL) { if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) break; if (op == SINSERT) --GETRINTERNAL(h, parent->index)->nrecs; else ++GETRINTERNAL(h, parent->index)->nrecs; mpool_put(t->bt_mp, h, MPOOL_DIRTY); } errno = sverrno; return (NULL); }
/* * BT_STAT -- Gather/print the tree statistics * * Parameters: * dbp: pointer to the DB */ void __bt_stat(DB *dbp) { extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit; extern u_long bt_sortsplit, bt_split; BTREE *t; PAGE *h; pgno_t i, pcont, pinternal, pleaf; u_long ifree, lfree, nkeys; int levels; t = dbp->internal; pcont = pinternal = pleaf = 0; nkeys = ifree = lfree = 0; for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i) switch (h->flags & P_TYPE) { case P_BINTERNAL: case P_RINTERNAL: ++pinternal; ifree += h->upper - h->lower; break; case P_BLEAF: case P_RLEAF: ++pleaf; lfree += h->upper - h->lower; nkeys += NEXTINDEX(h); break; case P_OVERFLOW: ++pcont; break; } /* Count the levels of the tree. */ for (i = P_ROOT, levels = 0 ;; ++levels) { h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN); if (h->flags & (P_BLEAF|P_RLEAF)) { if (levels == 0) levels = 1; break; } i = F_ISSET(t, R_RECNO) ? GETRINTERNAL(h, 0)->pgno : GETBINTERNAL(h, 0)->pgno; } (void)fprintf(stderr, "%d level%s with %lu keys", levels, levels == 1 ? "" : "s", nkeys); if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " (%u header count)", t->bt_nrecs); (void)fprintf(stderr, "\n%u pages (leaf %u, internal %u, overflow %u)\n", pinternal + pleaf + pcont, pleaf, pinternal, pcont); (void)fprintf(stderr, "%lu cache hits, %lu cache misses\n", bt_cache_hit, bt_cache_miss); (void)fprintf(stderr, "%lu splits (%lu root splits, %lu sort splits)\n", bt_split, bt_rootsplit, bt_sortsplit); pleaf *= t->bt_psize - BTDATAOFF; if (pleaf) (void)fprintf(stderr, "%.0f%% leaf fill (%lu bytes used, %lu bytes free)\n", ((double)(pleaf - lfree) / pleaf) * 100, pleaf - lfree, lfree); pinternal *= t->bt_psize - BTDATAOFF; if (pinternal) (void)fprintf(stderr, "%.0f%% internal fill (%lu bytes used, %lu bytes free\n", ((double)(pinternal - ifree) / pinternal) * 100, pinternal - ifree, ifree); if (bt_pfxsaved) (void)fprintf(stderr, "prefix checking removed %lu bytes.\n", bt_pfxsaved); }
/* * BT_PSPLIT -- Do the real work of splitting the page. * * Parameters: * t: tree * h: page to be split * l: page to put lower half of data * r: page to put upper half of data * pskip: pointer to index to leave open * ilen: insert length * * Returns: * Pointer to page in which to insert. */ static PAGE * bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen) { BINTERNAL *bi; BLEAF *bl; CURSOR *c; RLEAF *rl; PAGE *rval; void *src; indx_t full, half, nxt, off, skip, top, used; u_int32_t nbytes; int bigkeycnt, isbigkey; /* * Split the data to the left and right pages. Leave the skip index * open. Additionally, make some effort not to split on an overflow * key. This makes internal page processing faster and can save * space as overflow keys used by internal pages are never deleted. */ bigkeycnt = 0; skip = *pskip; full = t->bt_psize - BTDATAOFF; half = full / 2; used = 0; for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { if (skip == off) { nbytes = ilen; isbigkey = 0; /* XXX: not really known. */ } else switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); isbigkey = bi->flags & P_BIGKEY; break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); isbigkey = bl->flags & P_BIGKEY; break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; isbigkey = 0; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); isbigkey = 0; break; default: abort(); } /* * If the key/data pairs are substantial fractions of the max * possible size for the page, it's possible to get situations * where we decide to try and copy too much onto the left page. * Make sure that doesn't happen. */ if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) || nxt == top - 1) { --off; break; } /* Copy the key/data pair, if not the skipped index. */ if (skip != off) { ++nxt; l->linp[off] = l->upper -= nbytes; memmove((char *)l + l->upper, src, nbytes); } used += nbytes + sizeof(indx_t); if (used >= half) { if (!isbigkey || bigkeycnt == 3) break; else ++bigkeycnt; } } /* * Off is the last offset that's valid for the left page. * Nxt is the first offset to be placed on the right page. */ l->lower += (off + 1) * sizeof(indx_t); /* * If splitting the page that the cursor was on, the cursor has to be * adjusted to point to the same record as before the split. If the * cursor is at or past the skipped slot, the cursor is incremented by * one. If the cursor is on the right page, it is decremented by the * number of records split to the left page. */ c = &t->bt_cursor; if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { if (c->pg.index >= skip) ++c->pg.index; if (c->pg.index < nxt) /* Left page. */ c->pg.pgno = l->pgno; else { /* Right page. */ c->pg.pgno = r->pgno; c->pg.index -= nxt; } } /* * If the skipped index was on the left page, just return that page. * Otherwise, adjust the skip index to reflect the new position on * the right page. */ if (skip <= off) { skip = MAX_PAGE_OFFSET; rval = l; } else { rval = r; *pskip -= nxt; } for (off = 0; nxt < top; ++off) { if (skip == nxt) { ++off; skip = MAX_PAGE_OFFSET; } switch (h->flags & P_TYPE) { case P_BINTERNAL: src = bi = GETBINTERNAL(h, nxt); nbytes = NBINTERNAL(bi->ksize); break; case P_BLEAF: src = bl = GETBLEAF(h, nxt); nbytes = NBLEAF(bl); break; case P_RINTERNAL: src = GETRINTERNAL(h, nxt); nbytes = NRINTERNAL; break; case P_RLEAF: src = rl = GETRLEAF(h, nxt); nbytes = NRLEAF(rl); break; default: abort(); } ++nxt; r->linp[off] = r->upper -= nbytes; memmove((char *)r + r->upper, src, nbytes); } r->lower += off * sizeof(indx_t); /* If the key is being appended to the page, adjust the index. */ if (skip == top) r->lower += sizeof(indx_t); return (rval); }
/* * BT_DPAGE -- Dump the page * * Parameters: * h: pointer to the PAGE */ void __bt_dpage(PAGE *h) { BINTERNAL *bi; BLEAF *bl; RINTERNAL *ri; RLEAF *rl; indx_t cur, top; char *sep; (void)fprintf(stderr, " page %u: (", h->pgno); #undef X #define X(flag, name) \ if (h->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } sep = ""; X(P_BINTERNAL, "BINTERNAL") /* types */ X(P_BLEAF, "BLEAF") X(P_RINTERNAL, "RINTERNAL") /* types */ X(P_RLEAF, "RLEAF") X(P_OVERFLOW, "OVERFLOW") X(P_PRESERVE, "PRESERVE"); (void)fprintf(stderr, ")\n"); #undef X (void)fprintf(stderr, "\tprev %2u next %2u", h->prevpg, h->nextpg); if (h->flags & P_OVERFLOW) return; top = NEXTINDEX(h); (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n", h->lower, h->upper, top); for (cur = 0; cur < top; cur++) { (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); switch (h->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(h, cur); (void)fprintf(stderr, "size %03d pgno %03d", bi->ksize, bi->pgno); if (bi->flags & P_BIGKEY) (void)fprintf(stderr, " (indirect)"); else if (bi->ksize) (void)fprintf(stderr, " {%.*s}", (int)bi->ksize, bi->bytes); break; case P_RINTERNAL: ri = GETRINTERNAL(h, cur); (void)fprintf(stderr, "entries %03d pgno %03d", ri->nrecs, ri->pgno); break; case P_BLEAF: bl = GETBLEAF(h, cur); if (bl->flags & P_BIGKEY) (void)fprintf(stderr, "big key page %u size %u/", *(pgno_t *)bl->bytes, *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); else if (bl->ksize) (void)fprintf(stderr, "%s/", bl->bytes); if (bl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %u size %u", *(pgno_t *)(bl->bytes + bl->ksize), *(u_int32_t *)(bl->bytes + bl->ksize + sizeof(pgno_t))); else if (bl->dsize) (void)fprintf(stderr, "%.*s", (int)bl->dsize, bl->bytes + bl->ksize); break; case P_RLEAF: rl = GETRLEAF(h, cur); if (rl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %u size %u", *(pgno_t *)rl->bytes, *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); else if (rl->dsize) (void)fprintf(stderr, "%.*s", (int)rl->dsize, rl->bytes); break; } (void)fprintf(stderr, "\n"); } }