/* * __wt_btcur_next_random -- * Move to a random record in the tree. */ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; WT_DSTAT_INCR(session, cursor_next); retry: WT_RET(__cursor_func_init(cbt, 1)); __cursor_position_clear(cbt); /* * Only supports row-store: applications can trivially select a random * value from a column-store, if there were any reason to do so. */ WT_ERR(btree->type == BTREE_ROW ? __wt_row_random(session, cbt) : ENOTSUP); ret = cbt->compare == 0 ? __wt_kv_return(session, cbt) : WT_NOTFOUND; err: if (ret == WT_RESTART) goto retry; WT_TRET(__cursor_func_resolve(cbt, ret)); return (ret); }
/* * __wt_btcur_update -- * Update a record in the tree. */ int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; WT_SESSION_IMPL *session; int ret; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; WT_BSTAT_INCR(session, cursor_updates); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); retry: __cursor_func_init(cbt, 1); switch (btree->type) { case BTREE_COL_FIX: if (cursor->value.size != 1) WT_RET_MSG(session, EINVAL, "item size of %" PRIu32 " does not match " "fixed-length file requirement of 1 byte", cursor->value.size); /* FALLTHROUGH */ case BTREE_COL_VAR: WT_ERR(__wt_col_search(session, cbt, 1)); /* * Update the record if it exists. Creating a record past the * end of the tree in a fixed-length column-store implicitly * fills the gap with empty records. Update the record in that * case, the record exists. */ if ((cbt->compare != 0 || __cursor_invalid(cbt)) && !__cursor_fix_implicit(btree, cbt)) ret = WT_NOTFOUND; else if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART) goto retry; break; case BTREE_ROW: /* Update the record it it exists. */ WT_ERR(__wt_row_search(session, cbt, 1)); if (cbt->compare != 0 || __cursor_invalid(cbt)) ret = WT_NOTFOUND; else if ((ret = __wt_row_modify(session, cbt, 0)) == WT_RESTART) goto retry; break; WT_ILLEGAL_VALUE(session); } err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_remove -- * Remove a record from the tree. */ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; WT_SESSION_IMPL *session; int ret; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; WT_BSTAT_INCR(session, cursor_removes); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); retry: __cursor_func_init(cbt, 1); switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: WT_ERR(__wt_col_search(session, cbt, 1)); /* * Remove the record if it exists. Creating a record past the * end of the tree in a fixed-length column-store implicitly * fills the gap with empty records. Return success in that * case, the record was deleted successfully. */ if (cbt->compare != 0 || __cursor_invalid(cbt)) ret = __cursor_fix_implicit(btree, cbt) ? 0 : WT_NOTFOUND; else if ((ret = __wt_col_modify(session, cbt, 2)) == WT_RESTART) goto retry; break; case BTREE_ROW: /* Remove the record if it exists. */ WT_ERR(__wt_row_search(session, cbt, 1)); if (cbt->compare != 0 || __cursor_invalid(cbt)) ret = WT_NOTFOUND; else if ((ret = __wt_row_modify(session, cbt, 1)) == WT_RESTART) goto retry; break; WT_ILLEGAL_VALUE(session); } err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_search -- * Search for a matching record in the tree. */ int __wt_btcur_search(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; WT_ITEM *val; WT_SESSION_IMPL *session; int ret; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; WT_BSTAT_INCR(session, cursor_read); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); __cursor_func_init(cbt, 1); WT_ERR(btree->type == BTREE_ROW ? __wt_row_search(session, cbt, 0) : __wt_col_search(session, cbt, 0)); if (cbt->compare != 0 || __cursor_invalid(cbt)) { /* * Creating a record past the end of the tree in a fixed-length * column-store implicitly fills the gap with empty records. */ if (__cursor_fix_implicit(btree, cbt)) { cbt->v = 0; val = &cbt->iface.value; val->data = &cbt->v; val->size = 1; } else ret = WT_NOTFOUND; } else ret = __wt_kv_return(session, cbt, 0); err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int discard) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_DSTAT_INCR(session, cursor_prev); flags = WT_TREE_SKIP_INTL | WT_TREE_PREV; /* Tree walk flags. */ if (discard) LF_SET(WT_TREE_DISCARD); retry: WT_RET(__cursor_func_init(cbt, 0)); __cursor_position_clear(cbt); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt, 0); /* * If this is a modification, we're about to read information from the * page, save the write generation. */ page = cbt->page; if (discard && page != NULL) { WT_ERR(__wt_page_modify_init(session, page)); WT_ORDERED_READ(cbt->write_gen, page->modify->write_gen); } /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ for (newpage = 0;; newpage = 1) { if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = 1; } if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } cbt->page = NULL; WT_ERR(__wt_tree_walk(session, &page, flags)); WT_ERR_TEST(page == NULL, WT_NOTFOUND); WT_ASSERT(session, page->type != WT_PAGE_COL_INT && page->type != WT_PAGE_ROW_INT); cbt->page = page; /* Initialize the page's modification information */ if (discard) { WT_ERR(__wt_page_modify_init(session, page)); WT_ORDERED_READ( cbt->write_gen, page->modify->write_gen); } /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); } err: if (ret == WT_RESTART) goto retry; WT_TRET(__cursor_func_resolve(cbt, ret)); return (ret); }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt) { WT_DECL_RET; WT_SESSION_IMPL *session; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_BSTAT_INCR(session, cursor_read_prev); __cursor_func_init(cbt, 0); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt, 0); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ for (newpage = 0;; newpage = 1) { if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (cbt->page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = 1; } if (cbt->page != NULL) { switch (cbt->page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } do { WT_ERR(__wt_tree_np(session, &cbt->page, 0, 0)); WT_ERR_TEST(cbt->page == NULL, WT_NOTFOUND); } while ( cbt->page->type == WT_PAGE_COL_INT || cbt->page->type == WT_PAGE_ROW_INT); /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (cbt->page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(cbt->page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); } err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_insert -- * Insert a record into the tree. */ int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; WT_SESSION_IMPL *session; int ret; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; WT_BSTAT_INCR(session, cursor_inserts); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); retry: __cursor_func_init(cbt, 1); switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: /* * If WT_CURSTD_APPEND is set, insert a new record (ignoring * the application's record number). First we search for the * maximum possible record number so the search ends on the * last page. The real record number is assigned by the * serialized append operation. * __wt_col_append_serial_func */ if (F_ISSET(cursor, WT_CURSTD_APPEND)) cbt->iface.recno = UINT64_MAX; WT_ERR(__wt_col_search(session, cbt, 1)); if (F_ISSET(cursor, WT_CURSTD_APPEND)) cbt->iface.recno = 0; /* * If WT_CURSTD_OVERWRITE set, insert/update the key/value pair. * * If WT_CURSTD_OVERWRITE not set, fail if the key exists, else * insert the key/value pair. Creating a record past the end * of the tree in a fixed-length column-store implicitly fills * the gap with empty records. Fail in that case, the record * exists. */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ((cbt->compare == 0 && !__cursor_invalid(cbt)) || (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) { ret = WT_DUPLICATE_KEY; break; } if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART) goto retry; if (F_ISSET(cursor, WT_CURSTD_APPEND) && ret == 0) cbt->iface.recno = cbt->recno; break; case BTREE_ROW: /* * If WT_CURSTD_OVERWRITE not set, fail if the key exists, else * insert the key/value pair. * * If WT_CURSTD_OVERWRITE set, insert/update the key/value pair. */ WT_ERR(__wt_row_search(session, cbt, 1)); if (cbt->compare == 0 && !__cursor_invalid(cbt) && !F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { ret = WT_DUPLICATE_KEY; break; } if ((ret = __wt_row_modify(session, cbt, 0)) == WT_RESTART) goto retry; break; WT_ILLEGAL_VALUE(session); } err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_search_near -- * Search for a record in the tree. */ int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exact) { WT_BTREE *btree; WT_ITEM *val; WT_CURSOR *cursor; WT_SESSION_IMPL *session; int ret; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; WT_BSTAT_INCR(session, cursor_read_near); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); __cursor_func_init(cbt, 1); WT_ERR(btree->type == BTREE_ROW ? __wt_row_search(session, cbt, 0) : __wt_col_search(session, cbt, 0)); /* * Creating a record past the end of the tree in a fixed-length column- * store implicitly fills the gap with empty records. In this case, we * instantiate the empty record, it's an exact match. * * Else, if we find a valid key (one that wasn't deleted), return it. * * Else, if we found a deleted key, try to move to the next key in the * tree (bias for prefix searches). Cursor next skips deleted records, * so we don't have to test for them again. * * Else if there's no larger tree key, redo the search and try and find * an earlier record. If that fails, quit, there's no record to return. */ if (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)) { cbt->v = 0; val = &cbt->iface.value; val->data = &cbt->v; val->size = 1; *exact = 0; } else if (!__cursor_invalid(cbt)) { *exact = cbt->compare; ret = __wt_kv_return(session, cbt, cbt->compare == 0 ? 0 : 1); } else if ((ret = __wt_btcur_next(cbt)) != WT_NOTFOUND) *exact = 1; else { WT_ERR(btree->type == BTREE_ROW ? __wt_row_search(session, cbt, 0) : __wt_col_search(session, cbt, 0)); if (!__cursor_invalid(cbt)) { *exact = cbt->compare; ret = __wt_kv_return( session, cbt, cbt->compare == 0 ? 0 : 1); } else if ((ret = __wt_btcur_prev(cbt)) != WT_NOTFOUND) *exact = -1; } err: __cursor_func_resolve(cbt, ret); return (ret); }