/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH *fh) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); __wt_spin_lock(session, &conn->fh_lock); if (fh == NULL || fh->refcnt == 0 || --fh->refcnt > 0) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ TAILQ_REMOVE(&conn->fhqh, fh, q); WT_STAT_FAST_CONN_DECR(session, file_open); __wt_spin_unlock(session, &conn->fh_lock); /* Discard the memory. */ if (close(fh->fd) != 0) { ret = __wt_errno(); __wt_err(session, ret, "%s", fh->name); } __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __log_wrlsn_server -- * The log wrlsn server thread. */ static WT_THREAD_RET __log_wrlsn_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_SESSION_IMPL *session; int locked, yield; session = arg; conn = S2C(session); log = conn->log; locked = yield = 0; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { __wt_spin_lock(session, &log->log_slot_lock); locked = 1; WT_ERR(__wt_log_wrlsn(session, NULL, &yield)); locked = 0; __wt_spin_unlock(session, &log->log_slot_lock); if (++yield < 1000) __wt_yield(); else WT_ERR(__wt_cond_wait(session, conn->log_wrlsn_cond, 100000)); } if (0) { err: __wt_err(session, ret, "log wrlsn server error"); } if (locked) __wt_spin_unlock(session, &log->log_slot_lock); return (WT_THREAD_RET_VALUE); }
/* * __log_direct_write -- * Write a log record without using the consolidation arrays. */ static int __log_direct_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) { WT_DECL_RET; WT_LOG *log; WT_LOGSLOT tmp; WT_MYSLOT myslot; int locked; WT_DECL_SPINLOCK_ID(id); /* Must appear last */ log = S2C(session)->log; myslot.slot = &tmp; myslot.offset = 0; WT_CLEAR(tmp); /* Fast path the contended case. */ if (__wt_spin_trylock(session, &log->log_slot_lock, &id) != 0) return (EAGAIN); locked = 1; if (LF_ISSET(WT_LOG_FSYNC)) F_SET(&tmp, SLOT_SYNC); WT_ERR(__log_acquire(session, record->size, &tmp)); __wt_spin_unlock(session, &log->log_slot_lock); locked = 0; WT_ERR(__log_fill(session, &myslot, 1, record, lsnp)); WT_ERR(__log_release(session, &tmp)); err: if (locked) __wt_spin_unlock(session, &log->log_slot_lock); return (ret); }
/* * __wt_block_checkpoint_resolve -- * Resolve a checkpoint. */ int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block) { WT_BLOCK_CKPT *ci; WT_DECL_RET; ci = &block->live; /* * Resolve the checkpoint after our caller has written the checkpoint * information to stable storage. */ __wt_spin_lock(session, &block->live_lock); if (!block->ckpt_inprogress) WT_ERR(__wt_block_panic(session, WT_ERROR, "%s: checkpoint resolution with no checkpoint in progress", block->name)); if ((ret = __wt_block_extlist_merge( session, block, &ci->ckpt_avail, &ci->avail)) != 0) WT_ERR(__wt_block_panic(session, ret, "%s: fatal checkpoint failure", block->name)); __wt_spin_unlock(session, &block->live_lock); /* Discard the lists remaining after the checkpoint call. */ __wt_block_extlist_free(session, &ci->ckpt_avail); __wt_block_extlist_free(session, &ci->ckpt_alloc); __wt_block_extlist_free(session, &ci->ckpt_discard); __wt_spin_lock(session, &block->live_lock); block->ckpt_inprogress = 0; err: __wt_spin_unlock(session, &block->live_lock); return (ret); }
/* * __wt_block_checkpoint_resolve -- * Resolve a checkpoint. */ int __wt_block_checkpoint_resolve( WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed) { WT_BLOCK_CKPT *ci; WT_DECL_RET; ci = &block->live; /* * Resolve the checkpoint after our caller has written the checkpoint * information to stable storage. */ __wt_spin_lock(session, &block->live_lock); switch (block->ckpt_state) { case WT_CKPT_INPROGRESS: /* Something went wrong, but it's recoverable at our level. */ goto done; case WT_CKPT_NONE: case WT_CKPT_SALVAGE: __wt_err(session, EINVAL, "%s: an unexpected checkpoint resolution: the checkpoint " "was never started or completed, or configured for salvage", block->name); ret = __wt_block_panic(session); break; case WT_CKPT_PANIC_ON_FAILURE: if (!failed) break; __wt_err(session, EINVAL, "%s: the checkpoint failed, the system must restart", block->name); ret = __wt_block_panic(session); break; } WT_ERR(ret); if ((ret = __wt_block_extlist_merge( session, block, &ci->ckpt_avail, &ci->avail)) != 0) { __wt_err(session, ret, "%s: fatal checkpoint failure during extent list merge", block->name); ret = __wt_block_panic(session); } __wt_spin_unlock(session, &block->live_lock); /* Discard the lists remaining after the checkpoint call. */ __wt_block_extlist_free(session, &ci->ckpt_avail); __wt_block_extlist_free(session, &ci->ckpt_alloc); __wt_block_extlist_free(session, &ci->ckpt_discard); __wt_spin_lock(session, &block->live_lock); done: block->ckpt_state = WT_CKPT_NONE; err: __wt_spin_unlock(session, &block->live_lock); return (ret); }
/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; uint64_t bucket; conn = S2C(session); if (*fhp == NULL) return (0); fh = *fhp; *fhp = NULL; __wt_spin_lock(session, &conn->fh_lock); if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); (void)WT_ATOMIC_SUB4(conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); /* Discard the memory. * Note: For directories, we do not open valid directory handles on * windows since it is not possible to sync a directory */ if (fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: %s", fh->name); } if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle_secondary) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); } __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __wt_block_checkpoint_resolve -- * Resolve a checkpoint. */ int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block) { WT_BLOCK_CKPT *ci; WT_DECL_RET; ci = &block->live; /* * Checkpoints are a two-step process: first, write a new checkpoint to * disk (including all the new extent lists for modified checkpoints * and the live system). As part of this, create a list of file blocks * newly available for reallocation, based on checkpoints being deleted. * We then return the locations of the new checkpoint information to our * caller. Our caller has to write that information into some kind of * stable storage, and once that's done, we can actually allocate from * that list of newly available file blocks. (We can't allocate from * that list immediately because the allocation might happen before our * caller saves the new checkpoint information, and if we crashed before * the new checkpoint location was saved, we'd have overwritten blocks * still referenced by checkpoints in the system.) In summary, there is * a second step: after our caller saves the checkpoint information, we * are called to add the newly available blocks into the live system's * available list. */ __wt_spin_lock(session, &block->live_lock); ret = __wt_block_extlist_merge(session, &ci->ckpt_avail, &ci->avail); __wt_spin_unlock(session, &block->live_lock); /* Discard the list. */ __wt_block_extlist_free(session, &ci->ckpt_avail); return (ret); }
/* * __lsm_discard_handle -- * Try to discard a handle from cache. */ static int __lsm_discard_handle( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { WT_DECL_RET; int locked; /* This will fail with EBUSY if the file is still in use. */ WT_RET(__wt_session_get_btree(session, uri, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); WT_ASSERT(session, S2BT(session)->modified == 0); /* * We need the checkpoint lock to discard in-memory handles: otherwise, * an application checkpoint could see this file locked and fail with * EBUSY. * * We can't get the checkpoint lock earlier or it will deadlock with * the schema lock. */ locked = 0; if (checkpoint == NULL && (ret = __wt_spin_trylock(session, &S2C(session)->checkpoint_lock)) == 0) locked = 1; if (ret == 0) F_SET(session->dhandle, WT_DHANDLE_DISCARD); WT_TRET(__wt_session_release_btree(session)); if (locked) __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); return (ret); }
/* * __wt_block_checkpoint_unload -- * Unload a checkpoint. */ int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) { WT_DECL_RET; /* Verify cleanup. */ if (block->verify) WT_TRET(__wt_verify_ckpt_unload(session, block)); /* * If it's the live system, truncate to discard any extended blocks and * discard the active extent lists. Hold the lock even though we're * unloading the live checkpoint, there could be readers active in * other checkpoints. */ if (!checkpoint) { /* * The truncate might fail if there's a file mapping (if there's * an open checkpoint on the file), that's OK. */ WT_TRET_BUSY_OK( __wt_block_truncate(session, block->fh, block->fh->size)); __wt_spin_lock(session, &block->live_lock); __wt_block_ckpt_destroy(session, &block->live); __wt_spin_unlock(session, &block->live_lock); } return (ret); }
/* * __conn_add_compressor -- * WT_CONNECTION->add_compressor method. */ static int __conn_add_compressor(WT_CONNECTION *wt_conn, const char *name, WT_COMPRESSOR *compressor, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_NAMED_COMPRESSOR *ncomp; WT_SESSION_IMPL *session; WT_UNUSED(name); WT_UNUSED(compressor); ncomp = NULL; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_compressor, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ncomp)); WT_ERR(__wt_strdup(session, name, &ncomp->name)); ncomp->compressor = compressor; __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->compqh, ncomp, q); ncomp = NULL; __wt_spin_unlock(session, &conn->api_lock); err: if (ncomp != NULL) { __wt_free(session, ncomp->name); __wt_free(session, ncomp); } API_END_NOTFOUND_MAP(session, ret); }
/* * __remove_file_check -- * Check if the file is currently open before removing it. */ static inline void __remove_file_check(WT_SESSION_IMPL *session, const char *name) { #ifdef HAVE_DIAGNOSTIC WT_CONNECTION_IMPL *conn; WT_FH *fh; uint64_t bucket; conn = S2C(session); WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); fh = NULL; bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; /* * Check if the file is open: it's an error if it is, since a higher * level should have closed it before removing. */ __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) break; __wt_spin_unlock(session, &conn->fh_lock); WT_ASSERT(session, fh == NULL); #else WT_UNUSED(session); WT_UNUSED(name); #endif }
/* * __wt_handle_is_open -- * Return if there's an open handle matching a name. */ bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) { WT_CONNECTION_IMPL *conn; WT_FH *fh; uint64_t bucket, hash; bool found; conn = S2C(session); found = false; hash = __wt_hash_city64(name, strlen(name)); bucket = hash % WT_HASH_ARRAY_SIZE; __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) { found = true; break; } __wt_spin_unlock(session, &conn->fh_lock); return (found); }
/* * __wt_block_compact_page_skip -- * Return if writing a particular page will shrink the file. */ int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, uint32_t addr_size, int *skipp) { WT_FH *fh; off_t offset; uint32_t size, cksum; WT_UNUSED(addr_size); *skipp = 0; /* Paranoia: skip on error. */ fh = block->fh; /* Crack the cookie. */ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); /* * If this block appears in the last half of the file, rewrite it. * * It's unclear we need to lock: the chances of a smashed read are close * to non-existent and the worst thing that can happen is we rewrite a * block we didn't want to rewrite. On the other hand, compaction is * not expected to be a common operation in WiredTiger, we shouldn't be * here a lot. */ __wt_spin_lock(session, &block->live_lock); *skipp = offset > fh->size / 2 ? 0 : 1; __wt_spin_unlock(session, &block->live_lock); return (0); }
/* * __conn_add_data_source -- * WT_CONNECTION->add_data_source method. */ static int __conn_add_data_source(WT_CONNECTION *wt_conn, const char *prefix, WT_DATA_SOURCE *dsrc, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; WT_NAMED_DATA_SOURCE *ndsrc; ndsrc = NULL; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_data_source, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ndsrc)); WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix)); ndsrc->dsrc = dsrc; /* Link onto the environment's list of data sources. */ __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->dsrcqh, ndsrc, q); __wt_spin_unlock(session, &conn->api_lock); if (0) { err: if (ndsrc != NULL) __wt_free(session, ndsrc->prefix); __wt_free(session, ndsrc); } API_END_NOTFOUND_MAP(session, ret); }
/* * __conn_add_collator -- * WT_CONNECTION->add_collator method. */ static int __conn_add_collator(WT_CONNECTION *wt_conn, const char *name, WT_COLLATOR *collator, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_NAMED_COLLATOR *ncoll; WT_SESSION_IMPL *session; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_collator, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ncoll)); WT_ERR(__wt_strdup(session, name, &ncoll->name)); ncoll->collator = collator; __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->collqh, ncoll, q); __wt_spin_unlock(session, &conn->api_lock); ncoll = NULL; err: __wt_free(session, ncoll); API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_block_checkpoint_unload -- * Unload a checkpoint. */ int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) { WT_DECL_RET; /* Verify cleanup. */ if (block->verify) WT_TRET(__wt_verify_ckpt_unload(session, block)); /* * If it's the live system, truncate to discard any extended blocks and * discard the active extent lists. Hold the lock even though we're * unloading the live checkpoint, there could be readers active in other * checkpoints. */ if (!checkpoint) { WT_TRET(__wt_block_truncate(session, block, block->size)); __wt_spin_lock(session, &block->live_lock); __wt_block_ckpt_destroy(session, &block->live); #ifdef HAVE_DIAGNOSTIC block->live_open = false; #endif __wt_spin_unlock(session, &block->live_lock); } return (ret); }
/* * __wt_las_cursor_close -- * Discard a lookaside cursor. */ int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_RET; conn = S2C(session); if ((cursor = *cursorp) == NULL) return (0); *cursorp = NULL; /* Reset the cursor. */ ret = cursor->reset(cursor); /* * We turned off caching and eviction while the lookaside cursor was in * use, restore the session's flags. */ F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); F_SET(session, session_flags); /* * Some threads have their own lookaside table cursors, else unlock the * shared lookaside cursor. */ if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) __wt_spin_unlock(session, &conn->las_lock); return (ret); }
/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; uint64_t bucket; conn = S2C(session); if (*fhp == NULL) return (0); fh = *fhp; *fhp = NULL; /* Track handle-close as a file operation, so open and close match. */ WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); /* * If the reference count hasn't gone to 0, or if it's an in-memory * object, we're done. * * Assert the reference count is correct, but don't let it wrap. */ __wt_spin_lock(session, &conn->fh_lock); WT_ASSERT(session, fh->ref > 0); if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); /* Discard underlying resources. */ ret = fh->fh_close(session, fh); __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __wt_compact -- * Compact a file. */ int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BM *bm; WT_CONFIG_ITEM cval; WT_DECL_RET; WT_PAGE *page; int trigger, skip; bm = S2BT(session)->bm; WT_DSTAT_INCR(session, session_compact); WT_RET(__wt_config_gets(session, cfg, "trigger", &cval)); trigger = (int)cval.val; /* Check if compaction might be useful. */ WT_RET(bm->compact_skip(bm, session, trigger, &skip)); if (skip) return (0); /* * Walk the cache reviewing in-memory pages to see if they need to be * re-written. This requires looking at page reconciliation results, * which means the page cannot be reconciled at the same time as it's * being reviewed for compaction. The underlying functions ensure we * don't collide with page eviction, but we need to make sure we don't * collide with checkpoints either, they are the other operation that * can reconcile a page. */ __wt_spin_lock(session, &S2C(session)->metadata_lock); WT_RET(__wt_bt_cache_op(session, NULL, WT_SYNC_COMPACT)); __wt_spin_unlock(session, &S2C(session)->metadata_lock); /* * Walk the tree, reviewing on-disk pages to see if they need to be * re-written. */ for (page = NULL;;) { WT_RET(__wt_tree_walk(session, &page, WT_TREE_COMPACT)); if (page == NULL) break; /* * The only pages returned by the tree walk function are pages * we want to re-write; mark the page and tree dirty. */ if ((ret = __wt_page_modify_init(session, page)) != 0) { WT_TRET(__wt_page_release(session, page)); WT_RET(ret); } __wt_page_and_tree_modify_set(session, page); WT_DSTAT_INCR(session, btree_compact_rewrite); } return (0); }
/* * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; /* Check there's something to destroy. */ if (conn == NULL) return (0); session = conn->default_session; /* * Close remaining open files (before discarding the mutex, the * underlying file-close code uses the mutex to guard lists of * open files. */ WT_TRET(__wt_close(session, &conn->lock_fh)); /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); __wt_spin_unlock(session, &__wt_process.spinlock); /* Configuration */ __wt_conn_config_discard(session); /* configuration */ __wt_conn_foc_discard(session); /* free-on-close */ __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); __wt_spin_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock)); __wt_spin_destroy(session, &conn->las_lock); __wt_spin_destroy(session, &conn->metadata_lock); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); __wt_free(NULL, conn); return (ret); }
/* * __wt_conn_btree_sync_and_close -- * Sync and close the underlying btree handle. */ int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force) { WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; int no_schema_lock; dhandle = session->dhandle; btree = S2BT(session); if (!F_ISSET(dhandle, WT_DHANDLE_OPEN)) return (0); /* * If we don't already have the schema lock, make it an error to try * to acquire it. The problem is that we are holding an exclusive * lock on the handle, and if we attempt to acquire the schema lock * we might deadlock with a thread that has the schema lock and wants * a handle lock (specifically, checkpoint). */ no_schema_lock = 0; if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { no_schema_lock = 1; F_SET(session, WT_SESSION_NO_SCHEMA_LOCK); } /* * We may not be holding the schema lock, and threads may be walking * the list of open handles (for example, checkpoint). Acquire the * handle's close lock. */ __wt_spin_lock(session, &dhandle->close_lock); /* * The close can fail if an update cannot be written, return the EBUSY * error to our caller for eventual retry. */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) WT_ERR(__wt_checkpoint_close(session, force)); if (dhandle->checkpoint == NULL) --S2C(session)->open_btree_count; WT_TRET(__wt_btree_close(session)); F_CLR(dhandle, WT_DHANDLE_OPEN); F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); err: __wt_spin_unlock(session, &dhandle->close_lock); if (no_schema_lock) F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK); return (ret); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk; bitmap = bulk = false; flags = 0; WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening a bulk cursor, get the handle while * holding the checkpoint lock. This prevents a bulk cursor * open failing with EBUSY due to a database-wide checkpoint. */ if (bulk) __wt_spin_lock( session, &S2C(session)->checkpoint_lock); ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags); if (bulk) __wt_spin_unlock( session, &S2C(session)->checkpoint_lock); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); /* Increment the data-source's in-use counter. */ __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __wt_block_compact_skip -- * Return if compaction will shrink the file. */ int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) { WT_DECL_RET; WT_EXT *ext; WT_EXTLIST *el; WT_FH *fh; off_t avail, ninety; *skipp = 1; /* Return a default skip. */ fh = block->fh; /* * We do compaction by copying blocks from the end of the file to the * beginning of the file, and we need some metrics to decide if it's * worth doing. Ignore small files, and files where we are unlikely * to recover 10% of the file. */ if (fh->size <= 10 * 1024) return (0); __wt_spin_lock(session, &block->live_lock); if (WT_VERBOSE_ISSET(session, compact)) WT_ERR(__block_dump_avail(session, block)); /* Sum the number of available bytes in the first 90% of the file. */ avail = 0; ninety = fh->size - fh->size / 10; el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) if (ext->off < ninety) avail += ext->size; /* * If at least 10% of the total file is available and in the first 90% * of the file, we'll try compaction. */ if (avail >= fh->size / 10) *skipp = 0; WT_VERBOSE_ERR(session, compact, "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " "90%% of the file, require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") to perform compaction, compaction %s", block->name, (uintmax_t)avail / WT_MEGABYTE, (uintmax_t)avail, (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, *skipp ? "skipped" : "proceeding"); err: __wt_spin_unlock(session, &block->live_lock); return (ret); }
/* * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; /* Check there's something to destroy. */ if (conn == NULL) return (0); session = conn->default_session; /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); __wt_spin_unlock(session, &__wt_process.spinlock); /* Configuration */ __wt_conn_config_discard(session); /* configuration */ __wt_conn_foc_discard(session); /* free-on-close */ __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); __wt_spin_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_rwlock_destroy(session, &conn->hot_backup_lock); __wt_spin_destroy(session, &conn->las_lock); __wt_spin_destroy(session, &conn->metadata_lock); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); /* Destroy the file-system configuration. */ if (conn->file_system != NULL && conn->file_system->terminate != NULL) WT_TRET(conn->file_system->terminate( conn->file_system, (WT_SESSION *)session)); /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); __wt_stat_connection_discard(session, conn); __wt_free(NULL, conn); return (ret); }
/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH *fh) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); __wt_spin_lock(session, &conn->fh_lock); if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ TAILQ_REMOVE(&conn->fhqh, fh, q); WT_STAT_FAST_CONN_DECR(session, file_open); __wt_spin_unlock(session, &conn->fh_lock); /* Discard the memory. * Note: For directories, we do not open valid directory handles on * windows since it is not possible to sync a directory */ if (fh->filehandle != INVALID_HANDLE_VALUE && !CloseHandle(fh->filehandle) != 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: %s", fh->name); } if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && !CloseHandle(fh->filehandle_secondary) != 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); } __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __conn_load_extension -- * WT_CONNECTION->load_extension method. */ static int __conn_load_extension( WT_CONNECTION *wt_conn, const char *path, const char *config) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_DLH *dlh; WT_SESSION_IMPL *session; int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *); const char *init_name, *terminate_name; dlh = NULL; init_name = terminate_name = NULL; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, load_extension, config, cfg); WT_ERR(__wt_config_gets(session, cfg, "entry", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &init_name)); /* * This assumes the underlying shared libraries are reference counted, * that is, that re-opening a shared library simply increments a ref * count, and closing it simply decrements the ref count, and the last * close discards the reference entirely -- in other words, we do not * check to see if we've already opened this shared library. * * Fill in the extension structure and call the load function. */ WT_ERR(__wt_dlopen(session, path, &dlh)); WT_ERR(__wt_dlsym(session, dlh, init_name, 1, &load)); WT_ERR(load(wt_conn, (WT_CONFIG_ARG *)cfg)); /* Remember the unload function for when we close. */ WT_ERR(__wt_config_gets(session, cfg, "terminate", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &terminate_name)); WT_ERR(__wt_dlsym(session, dlh, terminate_name, 0, &dlh->terminate)); /* Link onto the environment's list of open libraries. */ __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->dlhqh, dlh, q); __wt_spin_unlock(session, &conn->api_lock); dlh = NULL; err: if (dlh != NULL) WT_TRET(__wt_dlclose(session, dlh)); __wt_free(session, init_name); __wt_free(session, terminate_name); API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_log_slot_grow_buffers -- * Increase the buffer size of all available slots in the buffer pool. * Go to some lengths to include active (but unused) slots to handle * the case where all log write record sizes exceed the size of the * active buffer. */ int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LOGSLOT *slot; int64_t orig_state; uint64_t old_size, total_growth; int i; conn = S2C(session); log = conn->log; total_growth = 0; WT_STAT_FAST_CONN_INCR(session, log_buffer_grow); /* * Take the log slot lock to prevent other threads growing buffers * at the same time. Could tighten the scope of this lock, or have * a separate lock if there is contention. */ __wt_spin_lock(session, &log->log_slot_lock); for (i = 0; i < SLOT_POOL; i++) { slot = &log->slot_pool[i]; /* Avoid atomic operations if they won't succeed. */ if (slot->slot_state != WT_LOG_SLOT_FREE && slot->slot_state != WT_LOG_SLOT_READY) continue; /* Don't keep growing unrelated buffers. */ if (slot->slot_buf.memsize > (10 * newsize) && !F_ISSET(slot, SLOT_BUF_GROW)) continue; orig_state = WT_ATOMIC_CAS_VAL8( slot->slot_state, WT_LOG_SLOT_FREE, WT_LOG_SLOT_PENDING); if (orig_state != WT_LOG_SLOT_FREE) { orig_state = WT_ATOMIC_CAS_VAL8(slot->slot_state, WT_LOG_SLOT_READY, WT_LOG_SLOT_PENDING); if (orig_state != WT_LOG_SLOT_READY) continue; } /* We have a slot - now go ahead and grow the buffer. */ old_size = slot->slot_buf.memsize; F_CLR(slot, SLOT_BUF_GROW); WT_ERR(__wt_buf_grow(session, &slot->slot_buf, WT_MAX(slot->slot_buf.memsize * 2, newsize))); slot->slot_state = orig_state; total_growth += slot->slot_buf.memsize - old_size; } err: __wt_spin_unlock(session, &log->log_slot_lock); WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, total_growth); return (ret); }
/* * __wt_handle_search -- * Search for a matching handle. */ bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_FH *fh; uint64_t bucket, hash; bool found; if (fhp != NULL) *fhp = NULL; conn = S2C(session); found = false; hash = __wt_hash_city64(name, strlen(name)); bucket = hash % WT_HASH_ARRAY_SIZE; __wt_spin_lock(session, &conn->fh_lock); /* * If we already have the file open, optionally increment the reference * count and return a pointer. */ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) if (strcmp(name, fh->name) == 0) { if (increment_ref) ++fh->ref; if (fhp != NULL) *fhp = fh; found = true; break; } /* If we don't find a match, optionally add a new entry. */ if (!found && newfh != NULL) { newfh->name_hash = hash; WT_CONN_FILE_INSERT(conn, newfh, bucket); (void)__wt_atomic_add32(&conn->open_file_count, 1); if (increment_ref) ++newfh->ref; if (fhp != NULL) *fhp = newfh; } __wt_spin_unlock(session, &conn->fh_lock); return (found); }
/* * __wt_block_compact_skip -- * Return if compaction will shrink the file. */ int __wt_block_compact_skip( WT_SESSION_IMPL *session, WT_BLOCK *block, int trigger, int *skipp) { WT_EXT *ext; WT_EXTLIST *el; WT_FH *fh; off_t avail, half; int pct; fh = block->fh; *skipp = 1; /* * We do compaction by copying blocks from the end of the file to the * beginning of the file, and we need some metrics to decide if it's * worth doing. Ignore small files, and files where we are unlikely * to recover the specified percentage of the file. (The calculation * is if at least N % of the file appears in the available list, and * in the first half of the file. In other words, don't bother with * compaction unless we have an expectation of moving N % of the file * from the last half of the file to the first half of the file.) */ if (fh->size <= 10 * 1024) return (0); __wt_spin_lock(session, &block->live_lock); avail = 0; half = fh->size / 2; el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) if (ext->off < half) avail += ext->size; pct = (int)((avail * 100) / fh->size); __wt_spin_unlock(session, &block->live_lock); if (pct >= trigger) *skipp = 0; WT_VERBOSE_RET(session, block, "%s: compaction %s, %d%% of the free space in the available " "list appears in the first half of the file", block->name, pct < trigger ? "skipped" : "proceeding", pct); return (0); }
/* * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_DECL_RET; WT_SESSION_IMPL *session; /* Check there's something to destroy. */ if (conn == NULL) return (0); session = conn->default_session; /* * Close remaining open files (before discarding the mutex, the * underlying file-close code uses the mutex to guard lists of * open files. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, conn->lock_fh)); if (conn->log_fh != NULL) WT_TRET(__wt_close(session, conn->log_fh)); /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); __wt_spin_unlock(session, &__wt_process.spinlock); /* Configuration */ __wt_conn_config_discard(session); /* configuration */ __wt_conn_foc_discard(session); /* free-on-close */ __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_spin_destroy(session, &conn->hot_backup_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->serial_lock); /* Free allocated memory. */ __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); __wt_free(NULL, conn); return (ret); }