/* * __session_open_cursor -- * WT_SESSION->open_cursor method. */ static int __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, open_cursor, config, cfg); if ((to_dup == NULL && uri == NULL) || (to_dup != NULL && uri != NULL)) WT_ERR_MSG(session, EINVAL, "should be passed either a URI or a cursor to duplicate, " "but not both"); if (to_dup != NULL) { uri = to_dup->uri; if (WT_PREFIX_MATCH(uri, "colgroup:") || WT_PREFIX_MATCH(uri, "index:") || WT_PREFIX_MATCH(uri, "file:") || WT_PREFIX_MATCH(uri, "lsm:") || WT_PREFIX_MATCH(uri, "table:")) ret = __wt_cursor_dup(session, to_dup, cfg, cursorp); else ret = __wt_bad_object_type(session, uri); } else ret = __wt_open_cursor(session, uri, NULL, cfg, cursorp); err: API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_schema_truncate -- * WT_SESSION::truncate without a range. */ int __wt_schema_truncate( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_DATA_SOURCE *dsrc; WT_DECL_RET; const char *tablename; tablename = uri; if (WT_PREFIX_MATCH(uri, "file:")) { ret = __truncate_file(session, uri); } else if (WT_PREFIX_MATCH(uri, "lsm:")) ret = __wt_lsm_tree_truncate(session, uri, cfg); else if (WT_PREFIX_SKIP(tablename, "table:")) ret = __truncate_table(session, tablename, cfg); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->truncate == NULL ? __truncate_dsrc(session, uri) : dsrc->truncate( dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); /* If we didn't find a metadata entry, map that error to ENOENT. */ return (ret == WT_NOTFOUND ? ENOENT : ret); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk; bitmap = bulk = false; flags = 0; WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening a bulk cursor, get the handle while * holding the checkpoint lock. This prevents a bulk cursor * open failing with EBUSY due to a database-wide checkpoint. */ if (bulk) __wt_spin_lock( session, &S2C(session)->checkpoint_lock); ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags); if (bulk) __wt_spin_unlock( session, &S2C(session)->checkpoint_lock); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); /* Increment the data-source's in-use counter. */ __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __wt_schema_get_index -- * Find an index by URI. */ int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_INDEX **indexp) { WT_DECL_RET; WT_INDEX *idx; WT_TABLE *table; const char *tablename, *tend; u_int i; *indexp = NULL; tablename = uri; if (!WT_PREFIX_SKIP(tablename, "index:") || (tend = strchr(tablename, ':')) == NULL) return (__wt_bad_object_type(session, uri)); WT_RET(__wt_schema_get_table(session, tablename, WT_PTRDIFF(tend, tablename), false, &table)); /* Try to find the index in the table. */ for (i = 0; i < table->nindices; i++) { idx = table->indices[i]; if (idx != NULL && strcmp(idx->name, uri) == 0) { if (tablep != NULL) *tablep = table; else __wt_schema_release_table(session, table); *indexp = idx; return (0); } } /* Otherwise, open it. */ WT_ERR(__wt_schema_open_index( session, table, tend + 1, strlen(tend + 1), indexp)); if (tablep != NULL) *tablep = table; err: __wt_schema_release_table(session, table); WT_RET(ret); if (*indexp != NULL) return (0); if (quiet) WT_RET(ENOENT); WT_RET_MSG(session, ENOENT, "%s not found in table", uri); }
/* * __wt_schema_drop -- * Process a WT_SESSION::drop operation for all supported types. */ int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_DATA_SOURCE *dsrc; WT_DECL_RET; bool force; WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); force = cval.val != 0; WT_RET(__wt_meta_track_on(session)); /* Paranoia: clear any handle from our caller. */ session->dhandle = NULL; if (WT_PREFIX_MATCH(uri, "colgroup:")) ret = __drop_colgroup(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "file:")) ret = __drop_file(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "index:")) ret = __drop_index(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "lsm:")) ret = __wt_lsm_tree_drop(session, uri, cfg); else if (WT_PREFIX_MATCH(uri, "table:")) ret = __drop_table(session, uri, cfg); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->drop == NULL ? __wt_object_unsupported(session, uri) : dsrc->drop( dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); /* * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means * there was no metadata entry. Map ENOENT to zero if force is set. */ if (ret == WT_NOTFOUND || ret == ENOENT) ret = force ? 0 : ENOENT; /* Bump the schema generation so that stale data is ignored. */ ++S2C(session)->schema_gen; WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); }
/* * __wt_schema_rename -- * WT_SESSION::rename. */ int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[]) { WT_DATA_SOURCE *dsrc; WT_DECL_RET; const char *p, *t; /* The target type must match the source type. */ for (p = uri, t = newuri; *p == *t && *p != ':'; ++p, ++t) ; if (*p != ':' || *t != ':') WT_RET_MSG(session, EINVAL, "rename target type must match URI: %s to %s", uri, newuri); /* * We track rename operations, if we fail in the middle, we want to * back it all out. */ WT_RET(__wt_meta_track_on(session)); if (WT_PREFIX_MATCH(uri, "file:")) ret = __rename_file(session, uri, newuri); else if (WT_PREFIX_MATCH(uri, "lsm:")) ret = __wt_lsm_tree_rename(session, uri, newuri, cfg); else if (WT_PREFIX_MATCH(uri, "table:")) ret = __rename_table(session, uri, newuri, cfg); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->rename == NULL ? __wt_object_unsupported(session, uri) : dsrc->rename(dsrc, &session->iface, uri, newuri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); /* Bump the schema generation so that stale data is ignored. */ ++S2C(session)->schema_gen; WT_TRET(__wt_meta_track_off(session, true, ret != 0)); /* If we didn't find a metadata entry, map that error to ENOENT. */ return (ret == WT_NOTFOUND ? ENOENT : ret); }
/* * __wt_schema_create -- * Process a WT_SESSION::create operation for all supported types. */ int __wt_schema_create( WT_SESSION_IMPL *session, const char *uri, const char *config) { WT_CONFIG_ITEM cval; WT_DATA_SOURCE *dsrc; WT_DECL_RET; int exclusive; exclusive = ( __wt_config_getones(session, config, "exclusive", &cval) == 0 && cval.val != 0); /* * We track create operations: if we fail in the middle of creating a * complex object, we want to back it all out. */ WT_RET(__wt_meta_track_on(session)); if (WT_PREFIX_MATCH(uri, "colgroup:")) ret = __create_colgroup(session, uri, exclusive, config); else if (WT_PREFIX_MATCH(uri, "file:")) ret = __create_file(session, uri, exclusive, config); else if (WT_PREFIX_MATCH(uri, "lsm:")) ret = __wt_lsm_tree_create(session, uri, exclusive, config); else if (WT_PREFIX_MATCH(uri, "index:")) ret = __create_index(session, uri, exclusive, config); else if (WT_PREFIX_MATCH(uri, "table:")) ret = __create_table(session, uri, exclusive, config); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->create == NULL ? __wt_object_unsupported(session, uri) : __create_data_source(session, uri, config, dsrc); else ret = __wt_bad_object_type(session, uri); session->dhandle = NULL; WT_TRET(__wt_meta_track_off(session, 1, ret != 0)); return (ret); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; int bitmap, bulk; uint32_t flags; flags = 0; WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval)); if (cval.type == ITEM_NUM && (cval.val == 0 || cval.val == 1)) { bitmap = 0; bulk = (cval.val != 0); } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = 1; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_BTREE_EXCLUSIVE); /* TODO: handle projections. */ /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, flags)); else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __wt_schema_get_colgroup -- * Find a column group by URI. */ int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp) { WT_COLGROUP *colgroup; WT_TABLE *table; const char *tablename, *tend; u_int i; *colgroupp = NULL; tablename = uri; if (!WT_PREFIX_SKIP(tablename, "colgroup:")) return (__wt_bad_object_type(session, uri)); if ((tend = strchr(tablename, ':')) == NULL) tend = tablename + strlen(tablename); WT_RET(__wt_schema_get_table(session, tablename, WT_PTRDIFF(tend, tablename), false, &table)); for (i = 0; i < WT_COLGROUPS(table); i++) { colgroup = table->cgroups[i]; if (strcmp(colgroup->name, uri) == 0) { *colgroupp = colgroup; if (tablep != NULL) *tablep = table; else __wt_schema_release_table(session, table); return (0); } } __wt_schema_release_table(session, table); if (quiet) WT_RET(ENOENT); WT_RET_MSG(session, ENOENT, "%s not found in table", uri); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; /* * Bulk and no cache handles are exclusive and may not be used by more * than a single thread. * Additionally set the discard flag on no cache handles so they are * destroyed on close. */ flags = 0; WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval)); if (cval.val != 0) LF_SET(WT_BTREE_EXCLUSIVE | WT_BTREE_BULK); WT_RET(__wt_config_gets_defno(session, cfg, "no_cache", &cval)); if (cval.val != 0) LF_SET(WT_BTREE_EXCLUSIVE | WT_BTREE_NO_CACHE); /* TODO: handle projections. */ /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, flags)); else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ (void)__wt_session_release_btree(session); return (ret); }
/* * __wt_session_compact -- * WT_SESSION.compact method. */ int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) { WT_COMPACT_STATE compact; WT_CONFIG_ITEM cval; WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; bool ignore_cache_size_set; ignore_cache_size_set = false; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, compact, config, cfg); /* * The compaction thread should not block when the cache is full: it is * holding locks blocking checkpoints and once the cache is full, it can * spend a long time doing eviction. */ if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) { ignore_cache_size_set = true; F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE); } /* In-memory ignores compaction operations. */ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto err; /* * Non-LSM object compaction requires checkpoints, which are impossible * in transactional contexts. Disallow in all contexts (there's no * reason for LSM to allow this, possible or not), and check now so the * error message isn't confusing. */ WT_ERR(__wt_txn_context_check(session, false)); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "lsm:") && !WT_PREFIX_MATCH(uri, "table:")) { if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->compact == NULL ? __wt_object_unsupported(session, uri) : dsrc->compact( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); goto err; } /* Setup the session handle's compaction state structure. */ memset(&compact, 0, sizeof(WT_COMPACT_STATE)); session->compact = &compact; /* Compaction can be time-limited. */ WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval)); session->compact->max_time = (uint64_t)cval.val; __wt_epoch(session, &session->compact->begin); /* Find the types of data sources being compacted. */ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __compact_handle_append, __compact_uri_analyze, cfg, 0)); WT_ERR(ret); if (session->compact->lsm_count != 0) WT_ERR(__wt_schema_worker( session, uri, NULL, __wt_lsm_compact, cfg, 0)); if (session->compact->file_count != 0) WT_ERR(__compact_worker(session)); err: session->compact = NULL; for (i = 0; i < session->op_handle_next; ++i) { WT_WITH_DHANDLE(session, session->op_handle[i], WT_TRET(__compact_end(session))); WT_WITH_DHANDLE(session, session->op_handle[i], WT_TRET(__wt_session_release_dhandle(session))); } __wt_free(session, session->op_handle); session->op_handle_allocated = session->op_handle_next = 0; /* * Release common session resources (for example, checkpoint may acquire * significant reconciliation structures/memory). */ WT_TRET(__wt_session_release_resources(session)); if (ignore_cache_size_set) F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE); if (ret != 0) WT_STAT_CONN_INCR(session, session_table_compact_fail); else WT_STAT_CONN_INCR(session, session_table_compact_success); API_END_RET_NOTFOUND_MAP(session, ret); }
/* * __wt_schema_worker -- * Get Btree handles for the object and cycle through calls to an * underlying worker function with each handle. */ int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) { WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_INDEX *idx; WT_SESSION *wt_session; WT_TABLE *table; u_int i; bool skip; table = NULL; skip = false; if (name_func != NULL) WT_ERR(name_func(session, uri, &skip)); /* If the callback said to skip this object, we're done. */ if (skip) return (0); /* Get the btree handle(s) and call the underlying function. */ if (WT_PREFIX_MATCH(uri, "file:")) { if (file_func != NULL) WT_ERR(__wt_exclusive_handle_operation(session, uri, file_func, cfg, open_flags)); } else if (WT_PREFIX_MATCH(uri, "colgroup:")) { WT_ERR(__wt_schema_get_colgroup( session, uri, false, NULL, &colgroup)); WT_ERR(__wt_schema_worker(session, colgroup->source, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_MATCH(uri, "index:")) { idx = NULL; WT_ERR(__wt_schema_get_index(session, uri, false, false, &idx)); WT_ERR(__wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_MATCH(uri, "lsm:")) { WT_ERR(__wt_lsm_tree_worker(session, uri, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_MATCH(uri, "table:")) { /* * Note: we would like to use open_flags here (e.g., to lock * the table exclusive during schema-changing operations), but * that is currently problematic because we get the table again * in order to discover column groups and indexes. */ WT_ERR(__wt_schema_get_table_uri( session, uri, false, 0, &table)); /* * We could make a recursive call for each colgroup or index * URI, but since we have already opened the table, we can take * a short cut and skip straight to the sources. If we have a * name function, it needs to know about the intermediate URIs. */ for (i = 0; i < WT_COLGROUPS(table); i++) { colgroup = table->cgroups[i]; skip = false; if (name_func != NULL) WT_ERR(name_func( session, colgroup->name, &skip)); if (!skip) WT_ERR(__wt_schema_worker( session, colgroup->source, file_func, name_func, cfg, open_flags)); } WT_ERR(__wt_schema_open_indices(session, table)); for (i = 0; i < table->nindices; i++) { idx = table->indices[i]; skip = false; if (name_func != NULL) WT_ERR(name_func(session, idx->name, &skip)); if (!skip) WT_ERR(__wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags)); } } else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) { wt_session = (WT_SESSION *)session; if (file_func == __wt_salvage && dsrc->salvage != NULL) WT_ERR(dsrc->salvage( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_verify && dsrc->verify != NULL) WT_ERR(dsrc->verify( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_checkpoint) ; else if (file_func == __wt_checkpoint_get_handles) ; else if (file_func == __wt_checkpoint_sync) ; else WT_ERR(__wt_object_unsupported(session, uri)); } else WT_ERR(__wt_bad_object_type(session, uri)); err: if (table != NULL) WT_TRET(__wt_schema_release_table(session, table)); return (ret); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk, checkpoint_wait; bitmap = bulk = false; checkpoint_wait = true; flags = 0; /* * Decode the bulk configuration settings. In memory databases * ignore bulk load. */ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; /* * Unordered bulk insert is a special case used * internally by index creation on existing tables. It * doesn't enforce any special semantics at the file * level. It primarily exists to avoid some locking * problems between LSM and index creation. */ else if (!WT_STRING_MATCH("unordered", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); if (bulk) { WT_RET(__wt_config_gets(session, cfg, "checkpoint_wait", &cval)); checkpoint_wait = cval.val != 0; } } /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening exclusive and don't want a bulk cursor * open to fail with EBUSY due to a database-wide checkpoint, * get the handle while holding the checkpoint lock. */ if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags)); else ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __session_truncate -- * WT_SESSION->truncate method. */ static int __session_truncate(WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_CURSOR *cursor; int cmp; session = (WT_SESSION_IMPL *)wt_session; SESSION_TXN_API_CALL(session, truncate, config, cfg); /* * If the URI is specified, we don't need a start/stop, if start/stop * is specified, we don't need a URI. * * If no URI is specified, and both cursors are specified, start/stop * must reference the same object. * * Any specified cursor must have been initialized. */ if ((uri == NULL && start == NULL && stop == NULL) || (uri != NULL && (start != NULL || stop != NULL))) WT_ERR_MSG(session, EINVAL, "the truncate method should be passed either a URI or " "start/stop cursors, but not both"); if (uri != NULL) { WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_truncate(session, uri, cfg)); goto done; } /* Truncate is only supported for file and table objects. */ cursor = start == NULL ? stop : start; if (!WT_PREFIX_MATCH(cursor->uri, "file:") && !WT_PREFIX_MATCH(cursor->uri, "table:")) WT_ERR(__wt_bad_object_type(session, cursor->uri)); /* * If both cursors set, check they're correctly ordered with respect to * each other. We have to test this before any search, the search can * change the initial cursor position. * * Rather happily, the compare routine will also confirm the cursors * reference the same object and the keys are set. */ if (start != NULL && stop != NULL) { WT_ERR(start->compare(start, stop, &cmp)); if (cmp > 0) WT_ERR_MSG(session, EINVAL, "the start cursor position is after the stop " "cursor position"); } /* * Truncate does not require keys actually exist so that applications * can discard parts of the object's name space without knowing exactly * what records currently appear in the object. For this reason, do a * search-near, rather than a search. Additionally, we have to correct * after calling search-near, to position the start/stop cursors on the * next record greater than/less than the original key. If the cursors * hit the beginning/end of the object, or the start/stop keys cross, * we're done, the range must be empty. */ if (start != NULL) { WT_ERR(start->search_near(start, &cmp)); if (cmp < 0 && (ret = start->next(start)) != 0) { WT_ERR_NOTFOUND_OK(ret); goto done; } } if (stop != NULL) { WT_ERR(stop->search_near(stop, &cmp)); if (cmp > 0 && (ret = stop->prev(stop)) != 0) { WT_ERR_NOTFOUND_OK(ret); goto done; } if (start != NULL) { WT_ERR(start->compare(start, stop, &cmp)); if (cmp > 0) goto done; } } if (WT_PREFIX_MATCH(cursor->uri, "file:")) WT_ERR(__wt_curfile_truncate(session, start, stop)); else WT_ERR(__wt_curtable_truncate(session, start, stop)); done: err: TXN_API_END_NOTFOUND_MAP(session, ret); }
/* * __session_compact -- * WT_SESSION.compact method. */ static int __session_compact(WT_SESSION *wt_session, const char *uri, const char *config) { WT_DECL_RET; WT_ITEM *t; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; /* Compaction makes no sense for LSM objects, ignore requests. */ if (WT_PREFIX_MATCH(uri, "lsm:")) return (0); if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "table:")) return (__wt_bad_object_type(session, uri)); /* * Compaction requires 2, and possibly 3 checkpoints, how many is block * manager specific: all block managers will need the first checkpoint, * but may or may not need the last two. * * The first checkpoint frees emptied pages to the underlying block * manager (when rows are deleted, underlying blocks aren't freed until * the page is reconciled, and checkpoint makes that happen). Because * compaction is based on having available blocks in the block manager, * compaction could do no work without the first checkpoint. * * After the first checkpoint, we compact the tree. * * The second and third checkpoints are done because the default block * manager does checkpoints in two steps: blocks made available for * re-use during a checkpoint are put on a special checkpoint-available * list and only moved onto the real available list once the metadata * has been updated with the newly written checkpoint information. This * means blocks allocated by the checkpoint itself cannot be taken from * the blocks made available by the checkpoint. * * In other words, the second checkpoint puts the blocks from the end of * the file that were freed by compaction onto the checkpoint-available * list, but then potentially writes checkpoint blocks at the end of the * file, which would prevent any file truncation. When the second * checkpoint resolves, those blocks become available for the third * checkpoint, so it's able to write its blocks toward the beginning of * the file, and then the file can be truncated. * * We do the work here so applications don't get confused why compaction * isn't helping until after multiple, subsequent checkpoint calls. * * Force the checkpoint: we don't want to skip it because the work we * need to have done is done in the underlying block manager. */ WT_RET(__wt_scr_alloc(session, 0, &t)); WT_ERR(__wt_buf_fmt(session, t, "target=(\"%s\")", uri)); WT_ERR(__session_checkpoint(wt_session, t->data)); WT_ERR(__session_compact_worker(wt_session, uri, config)); WT_ERR(__wt_buf_fmt(session, t, "target=(\"%s\"),force=1", uri)); WT_ERR(__session_checkpoint(wt_session, t->data)); WT_ERR(__session_checkpoint(wt_session, t->data)); err: __wt_scr_free(&t); return (ret); }
/* * __wt_schema_worker -- * Get Btree handles for the object and cycle through calls to an * underlying worker function with each handle. */ int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *), const char *cfg[], uint32_t open_flags) { WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_INDEX *idx; WT_SESSION *wt_session; WT_TABLE *table; const char *tablename; u_int i; table = NULL; tablename = uri; if (name_func != NULL) WT_ERR(name_func(session, uri)); /* Get the btree handle(s) and call the underlying function. */ if (WT_PREFIX_MATCH(uri, "file:")) { if (file_func != NULL) { WT_ERR(__wt_session_get_btree_ckpt( session, uri, cfg, open_flags)); ret = file_func(session, cfg); WT_TRET(__wt_session_release_btree(session)); } } else if (WT_PREFIX_MATCH(uri, "colgroup:")) { WT_ERR(__wt_schema_get_colgroup(session, uri, NULL, &colgroup)); WT_ERR(__wt_schema_worker(session, colgroup->source, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_SKIP(tablename, "index:")) { idx = NULL; WT_ERR(__wt_schema_get_index(session, uri, NULL, &idx)); WT_ERR(__wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_MATCH(uri, "lsm:")) { WT_ERR(__wt_lsm_tree_worker( session, uri, file_func, name_func, cfg, open_flags)); } else if (WT_PREFIX_SKIP(tablename, "table:")) { WT_ERR(__wt_schema_get_table(session, tablename, strlen(tablename), 0, &table)); WT_ASSERT(session, session->dhandle == NULL); /* * We could make a recursive call for each colgroup or index * URI, but since we have already opened the table, we can take * a short cut and skip straight to the sources. If we have a * name function, it needs to know about the intermediate URIs. */ for (i = 0; i < WT_COLGROUPS(table); i++) { colgroup = table->cgroups[i]; if (name_func != NULL) WT_ERR(name_func(session, colgroup->name)); WT_ERR(__wt_schema_worker(session, colgroup->source, file_func, name_func, cfg, open_flags)); } WT_ERR(__wt_schema_open_indices(session, table)); for (i = 0; i < table->nindices; i++) { idx = table->indices[i]; if (name_func != NULL) WT_ERR(name_func(session, idx->name)); WT_ERR(__wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags)); } } else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) { wt_session = (WT_SESSION *)session; if (file_func == __wt_compact && dsrc->compact != NULL) WT_ERR(dsrc->compact( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_salvage && dsrc->salvage != NULL) WT_ERR(dsrc->salvage( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_verify && dsrc->verify != NULL) WT_ERR(dsrc->verify( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else WT_ERR(__wt_object_unsupported(session, uri)); } else WT_ERR(__wt_bad_object_type(session, uri)); err: if (table != NULL) __wt_schema_release_table(session, table); return (ret); }