/* * __wt_ovfl_txnc_add -- * Add a new entry to the page's list of transaction-cached overflow * records. */ int __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) { WT_OVFL_TXNC **head, **stack[WT_SKIP_MAXDEPTH], *txnc; size_t size; u_int i, skipdepth; uint8_t *p; if (page->modify->ovfl_track == NULL) WT_RET(__ovfl_track_init(session, page)); head = page->modify->ovfl_track->ovfl_txnc; /* Choose a skiplist depth for this insert. */ skipdepth = __wt_skip_choose_depth(session); /* * Allocate the WT_OVFL_TXNC structure, next pointers for the skip * list, room for the address and value, then copy everything into * place. * * To minimize the WT_OVFL_TXNC structure size, the address offset * and size are single bytes: that's safe because the address follows * the structure (which can't be more than about 100B), and address * cookies are limited to 255B. */ size = sizeof(WT_OVFL_TXNC) + skipdepth * sizeof(WT_OVFL_TXNC *) + addr_size + value_size; WT_RET(__wt_calloc(session, 1, size, &txnc)); p = (uint8_t *)txnc + sizeof(WT_OVFL_TXNC) + skipdepth * sizeof(WT_OVFL_TXNC *); txnc->addr_offset = (uint8_t)WT_PTRDIFF(p, txnc); txnc->addr_size = (uint8_t)addr_size; memcpy(p, addr, addr_size); p += addr_size; txnc->value_offset = WT_PTRDIFF32(p, txnc); txnc->value_size = WT_STORE_SIZE(value_size); memcpy(p, value, value_size); txnc->current = __wt_txn_new_id(session); __wt_cache_page_inmem_incr( session, page, WT_OVFL_SIZE(txnc, WT_OVFL_TXNC)); /* Insert the new entry into the skiplist. */ __ovfl_txnc_skip_search_stack(head, stack, addr, addr_size); for (i = 0; i < skipdepth; ++i) { txnc->next[i] = *stack[i]; *stack[i] = txnc; } if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW)) WT_RET(__ovfl_txnc_verbose(session, page, txnc, "add")); return (0); }
/* * __ovfl_txnc_wrapup -- * Resolve the page's transaction-cache list. */ static int __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_OVFL_TXNC **e, **head, *txnc; uint64_t oldest_txn; size_t decr; int i; head = page->modify->ovfl_track->ovfl_txnc; /* * Take a snapshot of the oldest transaction ID we need to keep alive. * Since we do two passes through entries in the structure, the normal * visibility check could give different results as the global ID moves * forward. */ oldest_txn = __wt_txn_oldest_id(session); /* * Discard any transaction-cache records with transaction IDs earlier * than any in the system. * * First, walk the overflow transaction-cache skip lists (except for * the lowest level), fixing up links. */ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i) for (e = &head[i]; (txnc = *e) != NULL;) { if (WT_TXNID_LE(oldest_txn, txnc->current)) { e = &txnc->next[i]; continue; } *e = txnc->next[i]; } /* Second, discard any no longer needed transaction-cache records. */ decr = 0; for (e = &head[0]; (txnc = *e) != NULL;) { if (WT_TXNID_LE(oldest_txn, txnc->current)) { e = &txnc->next[0]; continue; } *e = txnc->next[0]; if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW)) WT_RET( __ovfl_txnc_verbose(session, page, txnc, "free")); decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC); __wt_free(session, txnc); } if (decr != 0) __wt_cache_page_inmem_decr(session, page, decr); return (0); }
/* * __ovfl_reuse_wrapup_err -- * Resolve the page's overflow reuse list after an error occurs. */ static int __ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BM *bm; WT_DECL_RET; WT_OVFL_REUSE **e, **head, *reuse; size_t decr; int i; bm = S2BT(session)->bm; head = page->modify->ovfl_track->ovfl_reuse; /* * Discard any overflow records that were just added, freeing underlying * blocks. * * First, walk the overflow reuse lists (except for the lowest one), * fixing up skiplist links. */ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i) for (e = &head[i]; (reuse = *e) != NULL;) { if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) { e = &reuse->next[i]; continue; } *e = reuse->next[i]; } /* * Second, discard any overflow record with a just-added flag, clear the * flags for the next run. */ decr = 0; for (e = &head[0]; (reuse = *e) != NULL;) { if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) { F_CLR(reuse, WT_OVFL_REUSE_INUSE); e = &reuse->next[0]; continue; } *e = reuse->next[0]; if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW)) WT_RET( __ovfl_reuse_verbose(session, page, reuse, "free")); WT_TRET(bm->free( bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size)); decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE); __wt_free(session, reuse); } if (decr != 0) __wt_cache_page_inmem_decr(session, page, decr); return (0); }
/* * __ovfl_txnc_wrapup -- * Resolve the page's transaction-cache list. */ static int __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_OVFL_TXNC **e, **head, *txnc; size_t decr; int i; head = page->modify->ovfl_track->ovfl_txnc; /* * Discard any transaction-cache records with transaction IDs earlier * than any in the system. * * First, walk the overflow transaction-cache skip lists (except for * the lowest level), fixing up links. */ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i) for (e = &head[i]; *e != NULL;) { if (!__wt_txn_visible_all(session, (*e)->current)) { e = &(*e)->next[i]; continue; } *e = (*e)->next[i]; } /* Second, discard any no longer needed transaction-cache records. */ decr = 0; for (e = &head[0]; (txnc = *e) != NULL;) { if (!__wt_txn_visible_all(session, txnc->current)) { e = &(*e)->next[0]; continue; } *e = (*e)->next[0]; decr += WT_OVFL_SIZE(WT_OVFL_TXNC) + txnc->addr_size + txnc->value_size; if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW)) WT_RET( __ovfl_txnc_verbose(session, page, txnc, "free")); __wt_free(session, txnc); } if (decr != 0) __wt_cache_page_inmem_decr(session, page, decr); return (0); }
/* * __ovfl_reuse_wrapup -- * Resolve the page's overflow reuse list after a page is written. */ static int __ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BM *bm; WT_OVFL_REUSE **e, **head, *reuse; size_t decr; int i; bm = S2BT(session)->bm; head = page->modify->ovfl_track->ovfl_reuse; /* * Discard any overflow records that aren't in-use, freeing underlying * blocks. * * First, walk the overflow reuse lists (except for the lowest one), * fixing up skiplist links. */ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i) for (e = &head[i]; (reuse = *e) != NULL;) { if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) { e = &reuse->next[i]; continue; } *e = reuse->next[i]; } /* * Second, discard any overflow record without an in-use flag, clear * the flags for the next run. * * As part of the pass through the lowest level, figure out how much * space we added/subtracted from the page, and update its footprint. * We don't get it exactly correct because we don't know the depth of * the skiplist here, but it's close enough, and figuring out the * memory footprint change in the reconciliation wrapup code means * fewer atomic updates and less code overall. */ decr = 0; for (e = &head[0]; (reuse = *e) != NULL;) { if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) { F_CLR(reuse, WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED); e = &reuse->next[0]; continue; } *e = reuse->next[0]; WT_ASSERT(session, !F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)); if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW)) WT_RET( __ovfl_reuse_verbose(session, page, reuse, "free")); WT_RET(bm->free( bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size)); decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE); __wt_free(session, reuse); } if (decr != 0) __wt_cache_page_inmem_decr(session, page, decr); return (0); }