/* * __wt_turtle_update -- * Update the turtle file. */ int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value) { FILE *fp; WT_DECL_RET; int vmajor, vminor, vpatch; const char *version; char *path; fp = NULL; path = NULL; /* * Create the turtle setup file: we currently re-write it from scratch * every time. */ WT_RET(__wt_filename(session, WT_METADATA_TURTLE_SET, &path)); if ((fp = fopen(path, "w")) == NULL) ret = __wt_errno(); __wt_free(session, path); if (fp == NULL) return (ret); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR_TEST((fprintf(fp, "%s\n%s\n%s\n" "major=%d,minor=%d,patch=%d\n%s\n%s\n", WT_METADATA_VERSION_STR, version, WT_METADATA_VERSION, vmajor, vminor, vpatch, key, value) < 0), __wt_errno()); ret = fclose(fp); fp = NULL; WT_ERR_TEST(ret == EOF, __wt_errno()); WT_ERR( __wt_rename(session, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE)); if (0) { err: WT_TRET(__wt_remove(session, WT_METADATA_TURTLE_SET)); } if (fp != NULL) WT_TRET(fclose(fp) == 0 ? 0 : __wt_errno()); return (ret); }
/* * __backup_all -- * Backup all objects in the database. */ static int __backup_all(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_DECL_RET; const char *key, *value; cursor = NULL; /* * Open a cursor on the metadata file and copy all of the entries to * the hot backup file. */ WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_key(cursor, &key)); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR_TEST((fprintf( cb->bfp, "%s\n%s\n", key, value) < 0), __wt_errno()); /* * While reading the metadata file, check there are no "sources" * or "types" which can't support hot backup. This checks for * a data source that's non-standard, which can't be backed up, * but is also sanity checking: if there's an entry backed by * anything other than a file or lsm entry, we're confused. */ if ((ret = __wt_config_getones( session, value, "type", &cval)) == 0 && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file") && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm")) WT_ERR_MSG(session, ENOTSUP, "hot backup is not supported for objects of " "type %.*s", (int)cval.len, cval.str); WT_ERR_NOTFOUND_OK(ret); if ((ret =__wt_config_getones( session, value, "source", &cval)) == 0 && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file:") && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm:")) WT_ERR_MSG(session, ENOTSUP, "hot backup is not supported for objects of " "source %.*s", (int)cval.len, cval.str); WT_ERR_NOTFOUND_OK(ret); } WT_ERR_NOTFOUND_OK(ret); /* Build a list of the file objects that need to be copied. */ WT_WITH_DHANDLE_LOCK(session, ret = __wt_meta_btree_apply( session, __backup_list_all_append, NULL)); err: if (cursor != NULL) WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_meta_turtle_read -- * Read the turtle file. */ int __wt_meta_turtle_read( WT_SESSION_IMPL *session, const char *key, const char **valuep) { FILE *fp; WT_DECL_RET; const char *path; char *p, line[1024]; fp = NULL; path = NULL; /* Open the turtle file. */ WT_RET(__wt_filename(session, WT_METADATA_TURTLE, &path)); WT_ERR_TEST((fp = fopen(path, "r")) == NULL, WT_NOTFOUND); /* Search for the key. */ ret = WT_NOTFOUND; while (fgets(line, sizeof(line), fp) != NULL) { if ((p = strchr(line, '\n')) == NULL) goto format; *p = '\0'; if (strcmp(key, line) == 0) ret = 0; /* Key matched: read the subsequent line for the value. */ if (fgets(line, sizeof(line), fp) == NULL) goto format; if ((p = strchr(line, '\n')) == NULL) goto format; *p = '\0'; if (ret == 0) break; } /* Check for an I/O error. */ if (ferror(fp)) WT_ERR(__wt_errno()); WT_ERR(ret); /* Successful: copy the value for the caller. */ WT_ERR(__wt_strdup(session, line, valuep)); if (0) { format: return (__wt_illegal_value(session, WT_METADATA_TURTLE)); } err: if (fp != NULL) WT_TRET(fclose(fp)); __wt_free(session, path); return (ret); }
/* * __backup_file_create -- * Create the meta-data backup file. */ static int __backup_file_create(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_DECL_RET; char *path; /* Open the hot backup file. */ WT_RET(__wt_filename(session, WT_METADATA_BACKUP, &path)); WT_ERR_TEST((cb->bfp = fopen(path, "w")) == NULL, __wt_errno()); err: __wt_free(session, path); return (ret); }
/* * __wt_rwlock_alloc -- * Allocate and initialize a read/write lock. */ int __wt_rwlock_alloc( WT_SESSION_IMPL *session, const char *name, WT_RWLOCK **rwlockp) { WT_DECL_RET; WT_RWLOCK *rwlock; WT_RET(__wt_calloc(session, 1, sizeof(WT_RWLOCK), &rwlock)); WT_ERR_TEST(pthread_rwlock_init(&rwlock->rwlock, NULL), WT_ERROR); rwlock->name = name; *rwlockp = rwlock; WT_VERBOSE_ERR(session, mutex, "rwlock: alloc %s (%p)", rwlock->name, rwlock); if (0) { err: __wt_free(session, rwlock); } return (ret); }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_FAST_CONN_INCR(session, cursor_prev); WT_STAT_FAST_DATA_INCR(session, cursor_prev); flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* Tree walk flags. */ if (truncating) LF_SET(WT_READ_TRUNCATE); WT_RET(__cursor_func_init(cbt, false)); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page)); /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (newpage && page != NULL && page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = true; } if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } /* * If we saw a lot of deleted records on this page, or we went * all the way through a page and only saw deleted records, try * to evict the page when we release it. Otherwise repeatedly * deleting from the beginning of a tree can have quadratic * performance. Take care not to force eviction of pages that * are genuinely empty, in new trees. */ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) __wt_page_evict_soon(page); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); return (ret); }
/* * __wt_btcur_next -- * Move to the next record in the tree. */ int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) { WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_NEXT)) __wt_btcur_iterate_setup(cbt); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the next page, until we reach the end of the * file. */ flags = WT_READ_SKIP_INTL; /* tree walk flags */ if (truncating) LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; } else if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_next(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; /* * Column-store pages may have appended entries. Handle * it separately from the usual cursor code, it's in a * simple format. */ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) { F_SET(cbt, WT_CBT_ITERATE_APPEND); continue; } } /* * If we saw a lot of deleted records on this page, or we went * all the way through a page and only saw deleted records, try * to evict the page when we release it. Otherwise repeatedly * deleting from the beginning of a tree can have quadratic * performance. Take care not to force eviction of pages that * are genuinely empty, in new trees. */ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) __wt_page_evict_soon(session, cbt->ref); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } #ifdef HAVE_DIAGNOSTIC if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); #endif if (ret == 0) F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); return (ret); }
/*将btree cursor移动到下一个记录*/ int __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_FAST_CONN_INCR(session, cursor_next); WT_STAT_FAST_DATA_INCR(session, cursor_next); /*btree 扫描标示*/ flags = WT_READ_SKIP_INTL; if (truncating) LF_SET(WT_READ_TRUNCATE); /*激活一个btree cursor*/ WT_RET(__cursor_func_init(cbt, 0)); /*初始化cursor*/ if (!F_ISSET(cbt, WT_CBT_ITERATE_NEXT)) __wt_btcur_iterate_setup(cbt, 1); /*对btree的扫描*/ for (;;){ page = cbt->ref == NULL ? NULL : cbt->ref->page; WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page)); /*column store append方式,在insert header上做扫描*/ if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)){ switch (page->type){ case WT_PAGE_COL_FIX: ret = __cursor_fix_append_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; /*清除掉column store的标记*/ F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; } else if (page != NULL){ switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_next(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } /*找到对应的记录了,直接返回*/ if (ret != WT_NOTFOUND) break; /*假如是column store方式,检查是否要扫描insert header list*/ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) { F_SET(cbt, WT_CBT_ITERATE_APPEND); continue; } } /*删除的记录太多,对page进行重组,增大page的填充因子*/ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))){ __wt_page_evict_soon(page); } cbt->page_deleted_count = 0; /*btree cursor跳转到下一个page上*/ WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } err: if (ret != 0) /*失败了,恢复cursor的状态*/ WT_TRET(__cursor_reset(cbt)); return ret; }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_FAST_CONN_INCR(session, cursor_prev); WT_STAT_FAST_DATA_INCR(session, cursor_prev); flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* Tree walk flags. */ if (truncating) LF_SET(WT_READ_TRUNCATE); WT_RET(__cursor_func_init(cbt, 0)); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt, 0); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ page = cbt->ref == NULL ? NULL : cbt->ref->page; for (newpage = 0;; newpage = 1) { if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = 1; } if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); page = cbt->ref->page; WT_ASSERT(session, page->type != WT_PAGE_COL_INT && page->type != WT_PAGE_ROW_INT); /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); } err: if (ret != 0) WT_TRET(__cursor_error_resolve(cbt)); return (ret); }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int discard) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_DSTAT_INCR(session, cursor_prev); flags = WT_TREE_SKIP_INTL | WT_TREE_PREV; /* Tree walk flags. */ if (discard) LF_SET(WT_TREE_DISCARD); retry: WT_RET(__cursor_func_init(cbt, 0)); __cursor_position_clear(cbt); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt, 0); /* * If this is a modification, we're about to read information from the * page, save the write generation. */ page = cbt->page; if (discard && page != NULL) { WT_ERR(__wt_page_modify_init(session, page)); WT_ORDERED_READ(cbt->write_gen, page->modify->write_gen); } /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ for (newpage = 0;; newpage = 1) { if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = 1; } if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } cbt->page = NULL; WT_ERR(__wt_tree_walk(session, &page, flags)); WT_ERR_TEST(page == NULL, WT_NOTFOUND); WT_ASSERT(session, page->type != WT_PAGE_COL_INT && page->type != WT_PAGE_ROW_INT); cbt->page = page; /* Initialize the page's modification information */ if (discard) { WT_ERR(__wt_page_modify_init(session, page)); WT_ORDERED_READ( cbt->write_gen, page->modify->write_gen); } /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); } err: if (ret == WT_RESTART) goto retry; WT_TRET(__cursor_func_resolve(cbt, ret)); return (ret); }
/* * __wt_btcur_prev -- * Move to the previous record in the tree. */ int __wt_btcur_prev(WT_CURSOR_BTREE *cbt) { WT_DECL_RET; WT_SESSION_IMPL *session; int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_BSTAT_INCR(session, cursor_read_prev); __cursor_func_init(cbt, 0); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV)) __wt_btcur_iterate_setup(cbt, 0); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the previous page, until we reach the start * of the file. */ for (newpage = 0;; newpage = 1) { if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (cbt->page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; newpage = 1; } if (cbt->page != NULL) { switch (cbt->page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_prev(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_prev(cbt, newpage); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_prev(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; } do { WT_ERR(__wt_tree_np(session, &cbt->page, 0, 0)); WT_ERR_TEST(cbt->page == NULL, WT_NOTFOUND); } while ( cbt->page->type == WT_PAGE_COL_INT || cbt->page->type == WT_PAGE_ROW_INT); /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (cbt->page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(cbt->page)) != NULL) F_SET(cbt, WT_CBT_ITERATE_APPEND); } err: __cursor_func_resolve(cbt, ret); return (ret); }
/* * __wt_btcur_next -- * Move to the next record in the tree. */ int __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) { WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; int skipped, newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_FAST_CONN_INCR(session, cursor_next); WT_STAT_FAST_DATA_INCR(session, cursor_next); flags = WT_READ_SKIP_INTL; /* Tree walk flags. */ if (truncating) LF_SET(WT_READ_TRUNCATE); WT_RET(__cursor_func_init(cbt, 0)); /* * If we aren't already iterating in the right direction, there's * some setup to do. */ if (!F_ISSET(cbt, WT_CBT_ITERATE_NEXT)) __wt_btcur_iterate_setup(cbt, 1); /* * Walk any page we're holding until the underlying call returns not- * found. Then, move to the next page, until we reach the end of the * file. */ for (skipped = newpage = 0;; skipped = 0, newpage = 1) { page = cbt->ref == NULL ? NULL : cbt->ref->page; WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page)); if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_append_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_append_next( cbt, newpage, &skipped); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret == 0) break; F_CLR(cbt, WT_CBT_ITERATE_APPEND); if (ret != WT_NOTFOUND) break; } else if (page != NULL) { switch (page->type) { case WT_PAGE_COL_FIX: ret = __cursor_fix_next(cbt, newpage); break; case WT_PAGE_COL_VAR: ret = __cursor_var_next(cbt, newpage, &skipped); break; case WT_PAGE_ROW_LEAF: ret = __cursor_row_next(cbt, newpage, &skipped); break; WT_ILLEGAL_VALUE_ERR(session); } if (ret != WT_NOTFOUND) break; /* * The last page in a column-store has appended entries. * We handle it separately from the usual cursor code: * it's only that one page and it's in a simple format. */ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) { F_SET(cbt, WT_CBT_ITERATE_APPEND); continue; } } /* * If we scanned all the way through a page and only saw * deleted records, try to evict the page as we release it. * Otherwise repeatedly deleting from the beginning of a tree * can have quadratic performance. */ if (newpage && skipped) page->read_gen = WT_READGEN_OLDEST; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); return (ret); }