示例#1
0
/*
 * __wt_connection_destroy --
 *	Destroy the connection's underlying WT_CONNECTION_IMPL structure.
 */
int
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	u_int i;

	/* Check there's something to destroy. */
	if (conn == NULL)
		return (0);

	session = conn->default_session;

	/*
	 * Close remaining open files (before discarding the mutex, the
	 * underlying file-close code uses the mutex to guard lists of
	 * open files.
	 */
	WT_TRET(__wt_close(session, &conn->lock_fh));

	/* Remove from the list of connections. */
	__wt_spin_lock(session, &__wt_process.spinlock);
	TAILQ_REMOVE(&__wt_process.connqh, conn, q);
	__wt_spin_unlock(session, &__wt_process.spinlock);

	/* Configuration */
	__wt_conn_config_discard(session);		/* configuration */

	__wt_conn_foc_discard(session);			/* free-on-close */

	__wt_spin_destroy(session, &conn->api_lock);
	__wt_spin_destroy(session, &conn->block_lock);
	__wt_spin_destroy(session, &conn->checkpoint_lock);
	__wt_spin_destroy(session, &conn->dhandle_lock);
	__wt_spin_destroy(session, &conn->encryptor_lock);
	__wt_spin_destroy(session, &conn->fh_lock);
	WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock));
	__wt_spin_destroy(session, &conn->las_lock);
	__wt_spin_destroy(session, &conn->metadata_lock);
	__wt_spin_destroy(session, &conn->reconfig_lock);
	__wt_spin_destroy(session, &conn->schema_lock);
	__wt_spin_destroy(session, &conn->table_lock);
	__wt_spin_destroy(session, &conn->turtle_lock);
	for (i = 0; i < WT_PAGE_LOCKS; ++i)
		__wt_spin_destroy(session, &conn->page_lock[i]);
	__wt_free(session, conn->page_lock);

	/* Free allocated memory. */
	__wt_free(session, conn->cfg);
	__wt_free(session, conn->home);
	__wt_free(session, conn->error_prefix);
	__wt_free(session, conn->sessions);

	__wt_free(NULL, conn);
	return (ret);
}
示例#2
0
/*
 * __wt_curfile_open --
 *	WT_SESSION->open_cursor method for the btree cursor type.
 */
int
__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
                  WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
    WT_CONFIG_ITEM cval;
    WT_DECL_RET;
    uint32_t flags;
    bool bitmap, bulk;

    bitmap = bulk = false;
    flags = 0;

    WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
    if (cval.type == WT_CONFIG_ITEM_BOOL ||
            (cval.type == WT_CONFIG_ITEM_NUM &&
             (cval.val == 0 || cval.val == 1))) {
        bitmap = false;
        bulk = cval.val != 0;
    } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
        bitmap = bulk = true;
    else
        WT_RET_MSG(session, EINVAL,
                   "Value for 'bulk' must be a boolean or 'bitmap'");

    /* Bulk handles require exclusive access. */
    if (bulk)
        LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE);

    /* Get the handle and lock it while the cursor is using it. */
    if (WT_PREFIX_MATCH(uri, "file:")) {
        /*
         * If we are opening a bulk cursor, get the handle while
         * holding the checkpoint lock.  This prevents a bulk cursor
         * open failing with EBUSY due to a database-wide checkpoint.
         */
        if (bulk)
            __wt_spin_lock(
                session, &S2C(session)->checkpoint_lock);
        ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags);
        if (bulk)
            __wt_spin_unlock(
                session, &S2C(session)->checkpoint_lock);
        WT_RET(ret);
    } else
        WT_RET(__wt_bad_object_type(session, uri));

    WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp));

    /* Increment the data-source's in-use counter. */
    __wt_cursor_dhandle_incr_use(session);
    return (0);

err:	/* If the cursor could not be opened, release the handle. */
    WT_TRET(__wt_session_release_btree(session));
    return (ret);
}
示例#3
0
/*
 * __wt_conn_btree_sync_and_close --
 *	Sync and close the underlying btree handle.
 */
int
__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
{
	WT_BTREE *btree;
	WT_DATA_HANDLE *dhandle;
	WT_DECL_RET;
	int no_schema_lock;

	dhandle = session->dhandle;
	btree = S2BT(session);

	if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
		return (0);

	/*
	 * If we don't already have the schema lock, make it an error to try
	 * to acquire it.  The problem is that we are holding an exclusive
	 * lock on the handle, and if we attempt to acquire the schema lock
	 * we might deadlock with a thread that has the schema lock and wants
	 * a handle lock (specifically, checkpoint).
	 */
	no_schema_lock = 0;
	if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) {
		no_schema_lock = 1;
		F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
	}

	/*
	 * We may not be holding the schema lock, and threads may be walking
	 * the list of open handles (for example, checkpoint).  Acquire the
	 * handle's close lock.
	 */
	__wt_spin_lock(session, &dhandle->close_lock);

	/*
	 * The close can fail if an update cannot be written, return the EBUSY
	 * error to our caller for eventual retry.
	 */
	if (!F_ISSET(btree,
	    WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
		WT_ERR(__wt_checkpoint_close(session, force));

	if (dhandle->checkpoint == NULL)
		--S2C(session)->open_btree_count;

	WT_TRET(__wt_btree_close(session));
	F_CLR(dhandle, WT_DHANDLE_OPEN);
	F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);

err:	__wt_spin_unlock(session, &dhandle->close_lock);

	if (no_schema_lock)
		F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);

	return (ret);
}
示例#4
0
/*
 * __wt_block_compact_skip --
 *	Return if compaction will shrink the file.
 */
int
__wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp)
{
	WT_DECL_RET;
	WT_EXT *ext;
	WT_EXTLIST *el;
	WT_FH *fh;
	off_t avail, ninety;

	*skipp = 1;				/* Return a default skip. */

	fh = block->fh;

	/*
	 * We do compaction by copying blocks from the end of the file to the
	 * beginning of the file, and we need some metrics to decide if it's
	 * worth doing.  Ignore small files, and files where we are unlikely
	 * to recover 10% of the file.
	 */
	if (fh->size <= 10 * 1024)
		return (0);

	__wt_spin_lock(session, &block->live_lock);

	if (WT_VERBOSE_ISSET(session, compact))
		WT_ERR(__block_dump_avail(session, block));

	/* Sum the number of available bytes in the first 90% of the file. */
	avail = 0;
	ninety = fh->size - fh->size / 10;

	el = &block->live.avail;
	WT_EXT_FOREACH(ext, el->off)
		if (ext->off < ninety)
			avail += ext->size;

	/*
	 * If at least 10% of the total file is available and in the first 90%
	 * of the file, we'll try compaction.
	 */
	if (avail >= fh->size / 10)
		*skipp = 0;

	WT_VERBOSE_ERR(session, compact,
	    "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
	    "90%% of the file, require 10%% or %" PRIuMAX "MB (%" PRIuMAX
	    ") to perform compaction, compaction %s",
	    block->name,
	    (uintmax_t)avail / WT_MEGABYTE, (uintmax_t)avail,
	    (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10,
	    *skipp ? "skipped" : "proceeding");

err:	__wt_spin_unlock(session, &block->live_lock);

	return (ret);
}
示例#5
0
/*
 * __wt_connection_destroy --
 *	Destroy the connection's underlying WT_CONNECTION_IMPL structure.
 */
int
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	u_int i;

	/* Check there's something to destroy. */
	if (conn == NULL)
		return (0);

	session = conn->default_session;

	/* Remove from the list of connections. */
	__wt_spin_lock(session, &__wt_process.spinlock);
	TAILQ_REMOVE(&__wt_process.connqh, conn, q);
	__wt_spin_unlock(session, &__wt_process.spinlock);

	/* Configuration */
	__wt_conn_config_discard(session);		/* configuration */

	__wt_conn_foc_discard(session);			/* free-on-close */

	__wt_spin_destroy(session, &conn->api_lock);
	__wt_spin_destroy(session, &conn->block_lock);
	__wt_spin_destroy(session, &conn->checkpoint_lock);
	__wt_spin_destroy(session, &conn->dhandle_lock);
	__wt_spin_destroy(session, &conn->encryptor_lock);
	__wt_spin_destroy(session, &conn->fh_lock);
	__wt_rwlock_destroy(session, &conn->hot_backup_lock);
	__wt_spin_destroy(session, &conn->las_lock);
	__wt_spin_destroy(session, &conn->metadata_lock);
	__wt_spin_destroy(session, &conn->reconfig_lock);
	__wt_spin_destroy(session, &conn->schema_lock);
	__wt_spin_destroy(session, &conn->table_lock);
	__wt_spin_destroy(session, &conn->turtle_lock);
	for (i = 0; i < WT_PAGE_LOCKS; ++i)
		__wt_spin_destroy(session, &conn->page_lock[i]);
	__wt_free(session, conn->page_lock);

	/* Destroy the file-system configuration. */
	if (conn->file_system != NULL && conn->file_system->terminate != NULL)
		WT_TRET(conn->file_system->terminate(
		    conn->file_system, (WT_SESSION *)session));

	/* Free allocated memory. */
	__wt_free(session, conn->cfg);
	__wt_free(session, conn->home);
	__wt_free(session, conn->error_prefix);
	__wt_free(session, conn->sessions);
	__wt_stat_connection_discard(session, conn);

	__wt_free(NULL, conn);
	return (ret);
}
示例#6
0
/*
 * __wt_log_slot_grow_buffers --
 *	Increase the buffer size of all available slots in the buffer pool.
 *	Go to some lengths to include active (but unused) slots to handle
 *	the case where all log write record sizes exceed the size of the
 *	active buffer.
 */
int
__wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOGSLOT *slot;
	int64_t orig_state;
	uint64_t old_size, total_growth;
	int i;

	conn = S2C(session);
	log = conn->log;
	total_growth = 0;
	WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
	/*
	 * Take the log slot lock to prevent other threads growing buffers
	 * at the same time. Could tighten the scope of this lock, or have
	 * a separate lock if there is contention.
	 */
	__wt_spin_lock(session, &log->log_slot_lock);
	for (i = 0; i < SLOT_POOL; i++) {
		slot = &log->slot_pool[i];
		/* Avoid atomic operations if they won't succeed. */
		if (slot->slot_state != WT_LOG_SLOT_FREE &&
		    slot->slot_state != WT_LOG_SLOT_READY)
			continue;
		/* Don't keep growing unrelated buffers. */
		if (slot->slot_buf.memsize > (10 * newsize) &&
		    !F_ISSET(slot, SLOT_BUF_GROW))
			continue;
		orig_state = WT_ATOMIC_CAS_VAL8(
		    slot->slot_state, WT_LOG_SLOT_FREE, WT_LOG_SLOT_PENDING);
		if (orig_state != WT_LOG_SLOT_FREE) {
			orig_state = WT_ATOMIC_CAS_VAL8(slot->slot_state,
			    WT_LOG_SLOT_READY, WT_LOG_SLOT_PENDING);
			if (orig_state != WT_LOG_SLOT_READY)
				continue;
		}

		/* We have a slot - now go ahead and grow the buffer. */
		old_size = slot->slot_buf.memsize;
		F_CLR(slot, SLOT_BUF_GROW);
		WT_ERR(__wt_buf_grow(session, &slot->slot_buf,
		    WT_MAX(slot->slot_buf.memsize * 2, newsize)));
		slot->slot_state = orig_state;
		total_growth += slot->slot_buf.memsize - old_size;
	}
err:	__wt_spin_unlock(session, &log->log_slot_lock);
	WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, total_growth);
	return (ret);
}
示例#7
0
/*
 * __conn_load_extension --
 *	WT_CONNECTION->load_extension method.
 */
static int
__conn_load_extension(
    WT_CONNECTION *wt_conn, const char *path, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_DLH *dlh;
	WT_SESSION_IMPL *session;
	int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *);
	const char *init_name, *terminate_name;

	dlh = NULL;
	init_name = terminate_name = NULL;

	conn = (WT_CONNECTION_IMPL *)wt_conn;
	CONNECTION_API_CALL(conn, session, load_extension, config, cfg);

	WT_ERR(__wt_config_gets(session, cfg, "entry", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &init_name));

	/*
	 * This assumes the underlying shared libraries are reference counted,
	 * that is, that re-opening a shared library simply increments a ref
	 * count, and closing it simply decrements the ref count, and the last
	 * close discards the reference entirely -- in other words, we do not
	 * check to see if we've already opened this shared library.
	 *
	 * Fill in the extension structure and call the load function.
	 */
	WT_ERR(__wt_dlopen(session, path, &dlh));
	WT_ERR(__wt_dlsym(session, dlh, init_name, 1, &load));
	WT_ERR(load(wt_conn, (WT_CONFIG_ARG *)cfg));

	/* Remember the unload function for when we close. */
	WT_ERR(__wt_config_gets(session, cfg, "terminate", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &terminate_name));
	WT_ERR(__wt_dlsym(session, dlh, terminate_name, 0, &dlh->terminate));

	/* Link onto the environment's list of open libraries. */
	__wt_spin_lock(session, &conn->api_lock);
	TAILQ_INSERT_TAIL(&conn->dlhqh, dlh, q);
	__wt_spin_unlock(session, &conn->api_lock);
	dlh = NULL;

err:	if (dlh != NULL)
		WT_TRET(__wt_dlclose(session, dlh));
	__wt_free(session, init_name);
	__wt_free(session, terminate_name);

	API_END_NOTFOUND_MAP(session, ret);
}
示例#8
0
文件: os_fhandle.c 项目: ralic/mongo
/*
 * __wt_handle_search --
 *	Search for a matching handle.
 */
bool
__wt_handle_search(WT_SESSION_IMPL *session,
                   const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp)
{
    WT_CONNECTION_IMPL *conn;
    WT_FH *fh;
    uint64_t bucket, hash;
    bool found;

    if (fhp != NULL)
        *fhp = NULL;

    conn = S2C(session);
    found = false;

    hash = __wt_hash_city64(name, strlen(name));
    bucket = hash % WT_HASH_ARRAY_SIZE;

    __wt_spin_lock(session, &conn->fh_lock);

    /*
     * If we already have the file open, optionally increment the reference
     * count and return a pointer.
     */
    TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
    if (strcmp(name, fh->name) == 0) {
        if (increment_ref)
            ++fh->ref;
        if (fhp != NULL)
            *fhp = fh;
        found = true;
        break;
    }

    /* If we don't find a match, optionally add a new entry. */
    if (!found && newfh != NULL) {
        newfh->name_hash = hash;
        WT_CONN_FILE_INSERT(conn, newfh, bucket);
        (void)__wt_atomic_add32(&conn->open_file_count, 1);

        if (increment_ref)
            ++newfh->ref;
        if (fhp != NULL)
            *fhp = newfh;
    }

    __wt_spin_unlock(session, &conn->fh_lock);

    return (found);
}
示例#9
0
/*
 * __wt_block_compact_skip --
 *	Return if compaction will shrink the file.
 */
int
__wt_block_compact_skip(
    WT_SESSION_IMPL *session, WT_BLOCK *block, int trigger, int *skipp)
{
	WT_EXT *ext;
	WT_EXTLIST *el;
	WT_FH *fh;
	off_t avail, half;
	int pct;

	fh = block->fh;
	*skipp = 1;

	/*
	 * We do compaction by copying blocks from the end of the file to the
	 * beginning of the file, and we need some metrics to decide if it's
	 * worth doing.  Ignore small files, and files where we are unlikely
	 * to recover the specified percentage of the file.  (The calculation
	 * is if at least N % of the file appears in the available list, and
	 * in the first half of the file.  In other words, don't bother with
	 * compaction unless we have an expectation of moving N % of the file
	 * from the last half of the file to the first half of the file.)
	 */
	if (fh->size <= 10 * 1024)
		return (0);

	__wt_spin_lock(session, &block->live_lock);

	avail = 0;
	half = fh->size / 2;

	el = &block->live.avail;
	WT_EXT_FOREACH(ext, el->off)
		if (ext->off < half)
			avail += ext->size;
	pct = (int)((avail * 100) / fh->size);

	__wt_spin_unlock(session, &block->live_lock);

	if (pct >= trigger)
		*skipp = 0;

	WT_VERBOSE_RET(session, block,
	    "%s: compaction %s, %d%% of the free space in the available "
	    "list appears in the first half of the file",
	    block->name, pct < trigger ? "skipped" : "proceeding", pct);

	return (0);
}
示例#10
0
/*
 * __wt_connection_destroy --
 *	Destroy the connection's underlying WT_CONNECTION_IMPL structure.
 */
int
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	/* Check there's something to destroy. */
	if (conn == NULL)
		return (0);

	session = conn->default_session;

	/*
	 * Close remaining open files (before discarding the mutex, the
	 * underlying file-close code uses the mutex to guard lists of
	 * open files.
	 */
	if (conn->lock_fh != NULL)
		WT_TRET(__wt_close(session, conn->lock_fh));

	if (conn->log_fh != NULL)
		WT_TRET(__wt_close(session, conn->log_fh));

	/* Remove from the list of connections. */
	__wt_spin_lock(session, &__wt_process.spinlock);
	TAILQ_REMOVE(&__wt_process.connqh, conn, q);
	__wt_spin_unlock(session, &__wt_process.spinlock);

	/* Configuration */
	__wt_conn_config_discard(session);		/* configuration */

	__wt_conn_foc_discard(session);			/* free-on-close */

	__wt_spin_destroy(session, &conn->api_lock);
	__wt_spin_destroy(session, &conn->block_lock);
	__wt_spin_destroy(session, &conn->checkpoint_lock);
	__wt_spin_destroy(session, &conn->fh_lock);
	__wt_spin_destroy(session, &conn->hot_backup_lock);
	__wt_spin_destroy(session, &conn->schema_lock);
	__wt_spin_destroy(session, &conn->serial_lock);

	/* Free allocated memory. */
	__wt_free(session, conn->home);
	__wt_free(session, conn->error_prefix);
	__wt_free(session, conn->sessions);

	__wt_free(NULL, conn);
	return (ret);
}
示例#11
0
/*
 * __wt_close --
 *	Close a file handle.
 */
int
__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *fh;
	uint64_t bucket;

	conn = S2C(session);

	if (*fhp == NULL)
		return (0);
	fh = *fhp;
	*fhp = NULL;

	__wt_spin_lock(session, &conn->fh_lock);
	if (fh == NULL || fh->ref == 0 || --fh->ref > 0) {
		__wt_spin_unlock(session, &conn->fh_lock);
		return (0);
	}

	/* Remove from the list. */
	bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
	WT_CONN_FILE_REMOVE(conn, fh, bucket);
	(void)WT_ATOMIC_SUB4(conn->open_file_count, 1);

	__wt_spin_unlock(session, &conn->fh_lock);

	/* Discard the memory.
	 * Note: For directories, we do not open valid directory handles on
	 * windows since it is not possible to sync a directory
	 */
	if (fh->filehandle != INVALID_HANDLE_VALUE &&
	    CloseHandle(fh->filehandle) == 0) {
		ret = __wt_errno();
		__wt_err(session, ret, "CloseHandle: %s", fh->name);
	}

	if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
	    CloseHandle(fh->filehandle_secondary) == 0) {
		ret = __wt_errno();
		__wt_err(session, ret, "CloseHandle: secondary: %s", fh->name);
	}

	__wt_free(session, fh->name);
	__wt_free(session, fh);
	return (ret);
}
示例#12
0
/*
 * __wt_block_compact_end --
 *	End compaction of a file.
 */
int
__wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
	WT_UNUSED(session);

	/*
	 * Restore the previous allocation plan.
	 * We don't need the lock, but it's not a performance question and
	 * might avoid bugs in the future.
	 */
	__wt_spin_lock(session, &block->live_lock);
	block->allocfirst = block->allocfirst_save;
	__wt_spin_unlock(session, &block->live_lock);

	return (0);
}
示例#13
0
/*
 * __conn_load_extension --
 *	WT_CONNECTION->load_extension method.
 */
static int
__conn_load_extension(
    WT_CONNECTION *wt_conn, const char *path, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_DLH *dlh;
	WT_SESSION_IMPL *session;
	int (*entry)(WT_SESSION *, WT_EXTENSION_API *, const char *);
	const char *entry_name;

	dlh = NULL;

	conn = (WT_CONNECTION_IMPL *)wt_conn;
	CONNECTION_API_CALL(conn, session, load_extension, config, cfg);

	entry_name = NULL;
	WT_ERR(__wt_config_gets(session, cfg, "entry", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &entry_name));

	/*
	 * This assumes the underlying shared libraries are reference counted,
	 * that is, that re-opening a shared library simply increments a ref
	 * count, and closing it simply decrements the ref count, and the last
	 * close discards the reference entirely -- in other words, we do not
	 * check to see if we've already opened this shared library.
	 */
	WT_ERR(__wt_dlopen(session, path, &dlh));
	WT_ERR(__wt_dlsym(session, dlh, entry_name, &entry));

	/* Call the entry function. */
	WT_ERR(entry(&session->iface, &__api, config));

	/* Link onto the environment's list of open libraries. */
	__wt_spin_lock(session, &conn->api_lock);
	TAILQ_INSERT_TAIL(&conn->dlhqh, dlh, q);
	__wt_spin_unlock(session, &conn->api_lock);

	if (0) {
err:		if (dlh != NULL)
			WT_TRET(__wt_dlclose(session, dlh));
	}
	__wt_free(session, entry_name);

	API_END_NOTFOUND_MAP(session, ret);
}
示例#14
0
文件: os_fhandle.c 项目: ralic/mongo
/*
 * __wt_close --
 *	Close a file handle.
 */
int
__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
{
    WT_CONNECTION_IMPL *conn;
    WT_DECL_RET;
    WT_FH *fh;
    uint64_t bucket;

    conn = S2C(session);

    if (*fhp == NULL)
        return (0);
    fh = *fhp;
    *fhp = NULL;

    /* Track handle-close as a file operation, so open and close match. */
    WT_RET(__wt_verbose(
               session, WT_VERB_FILEOPS, "%s: handle-close", fh->name));

    /*
     * If the reference count hasn't gone to 0, or if it's an in-memory
     * object, we're done.
     *
     * Assert the reference count is correct, but don't let it wrap.
     */
    __wt_spin_lock(session, &conn->fh_lock);
    WT_ASSERT(session, fh->ref > 0);
    if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) {
        __wt_spin_unlock(session, &conn->fh_lock);
        return (0);
    }

    /* Remove from the list. */
    bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
    WT_CONN_FILE_REMOVE(conn, fh, bucket);
    (void)__wt_atomic_sub32(&conn->open_file_count, 1);

    __wt_spin_unlock(session, &conn->fh_lock);

    /* Discard underlying resources. */
    ret = fh->fh_close(session, fh);

    __wt_free(session, fh->name);
    __wt_free(session, fh);

    return (ret);
}
示例#15
0
/*
 * __wt_block_compact_start --
 *	Start compaction of a file.
 */
int
__wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
	WT_UNUSED(session);

	/*
	 * Save the current allocation plan, switch to first-fit allocation.
	 * We don't need the lock, but it's not a performance question and
	 * might avoid bugs in the future.
	 */
	__wt_spin_lock(session, &block->live_lock);
	block->allocfirst_save = block->allocfirst;
	block->allocfirst = 1;
	__wt_spin_unlock(session, &block->live_lock);

	return (0);
}
示例#16
0
/*
 * __backup_stop --
 *	Stop a backup.
 */
static int
__backup_stop(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	conn = S2C(session);

	/* Remove any backup specific file. */
	ret = __backup_file_remove(session);

	/* Checkpoint deletion can proceed, as can the next hot backup. */
	__wt_spin_lock(session, &conn->hot_backup_lock);
	conn->hot_backup = 0;
	__wt_spin_unlock(session, &conn->hot_backup_lock);

	return (ret);
}
示例#17
0
文件: cache_las.c 项目: qihsh/mongo
/*
 * __wt_las_cursor --
 *	Return a lookaside cursor.
 */
int
__wt_las_cursor(
    WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	*cursorp = NULL;

	/*
	 * We don't want to get tapped for eviction after we start using the
	 * lookaside cursor; save a copy of the current eviction state, we'll
	 * turn eviction off before we return.
	 *
	 * Don't cache lookaside table pages, we're here because of eviction
	 * problems and there's no reason to believe lookaside pages will be
	 * useful more than once.
	 */
	*session_flags =
	    F_ISSET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);

	conn = S2C(session);

	/* Eviction and sweep threads have their own lookaside table cursors. */
	if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
		if (session->las_cursor == NULL) {
			WT_WITHOUT_DHANDLE(session, ret =
			    __las_cursor_create(session, &session->las_cursor));
			WT_RET(ret);
		}

		*cursorp = session->las_cursor;
	} else {
		/* Lock the shared lookaside cursor. */
		__wt_spin_lock(session, &conn->las_lock);

		*cursorp = conn->las_cursor;
	}

	/* Turn caching and eviction off. */
	F_SET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);

	return (0);
}
示例#18
0
/*
 * __wt_block_stat --
 *	Block statistics
 */
void
__wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats)
{
	/*
	 * We're looking inside the live system's structure, which normally
	 * requires locking: the chances of a corrupted read are probably
	 * non-existent, and it's statistics information regardless, but it
	 * isn't like this is a common function for an application to call.
	 */
	__wt_spin_lock(session, &block->live_lock);
	WT_STAT_SET(stats, allocation_size, block->allocsize);
	WT_STAT_SET(stats, block_checkpoint_size, block->live.ckpt_size);
	WT_STAT_SET(stats, block_magic, WT_BLOCK_MAGIC);
	WT_STAT_SET(stats, block_major, WT_BLOCK_MAJOR_VERSION);
	WT_STAT_SET(stats, block_minor, WT_BLOCK_MINOR_VERSION);
	WT_STAT_SET(stats, block_reuse_bytes, block->live.avail.bytes);
	WT_STAT_SET(stats, block_size, block->fh->size);
	__wt_spin_unlock(session, &block->live_lock);
}
示例#19
0
/*
 * __wt_connection_destroy --
 *	Destroy the connection's underlying WT_CONNECTION_IMPL structure.
 */
void
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
	WT_SESSION_IMPL *session;

	/* Check there's something to destroy. */
	if (conn == NULL)
		return;

	session = conn->default_session;

	/* Remove from the list of connections. */
	__wt_spin_lock(session, &__wt_process.spinlock);
	TAILQ_REMOVE(&__wt_process.connqh, conn, q);
	__wt_spin_unlock(session, &__wt_process.spinlock);

	/* Configuration */
	__wt_conn_config_discard(session);		/* configuration */

	__wt_conn_foc_discard(session);			/* free-on-close */

	__wt_spin_destroy(session, &conn->api_lock);
	__wt_spin_destroy(session, &conn->block_lock);
	__wt_spin_destroy(session, &conn->checkpoint_lock);
	__wt_rwlock_destroy(session, &conn->dhandle_lock);
	__wt_spin_destroy(session, &conn->encryptor_lock);
	__wt_spin_destroy(session, &conn->fh_lock);
	__wt_rwlock_destroy(session, &conn->hot_backup_lock);
	__wt_spin_destroy(session, &conn->metadata_lock);
	__wt_spin_destroy(session, &conn->reconfig_lock);
	__wt_spin_destroy(session, &conn->schema_lock);
	__wt_rwlock_destroy(session, &conn->table_lock);
	__wt_spin_destroy(session, &conn->turtle_lock);

	/* Free allocated memory. */
	__wt_free(session, conn->cfg);
	__wt_free(session, conn->home);
	__wt_free(session, conn->error_prefix);
	__wt_free(session, conn->sessions);
	__wt_stat_connection_discard(session, conn);

	__wt_free(NULL, conn);
}
示例#20
0
文件: os_open.c 项目: 3rf/mongo
/*
 * __wt_close --
 *	Close a file handle.
 */
int
__wt_close(WT_SESSION_IMPL *session, WT_FH *fh)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	conn = S2C(session);

	__wt_spin_lock(session, &conn->fh_lock);
	if (fh == NULL || fh->ref == 0 || --fh->ref > 0) {
		__wt_spin_unlock(session, &conn->fh_lock);
		return (0);
	}

	/* Remove from the list. */
	TAILQ_REMOVE(&conn->fhqh, fh, q);
	WT_STAT_FAST_CONN_DECR(session, file_open);

	__wt_spin_unlock(session, &conn->fh_lock);

	/* Discard the memory.
	 * Note: For directories, we do not open valid directory handles on
	 * windows since it is not possible to sync a directory
	 */
	if (fh->filehandle != INVALID_HANDLE_VALUE &&
	    !CloseHandle(fh->filehandle) != 0) {
		ret = __wt_errno();
		__wt_err(session, ret, "CloseHandle: %s", fh->name);
	}

	if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
	    !CloseHandle(fh->filehandle_secondary) != 0) {
		ret = __wt_errno();
		__wt_err(session, ret, "CloseHandle: secondary: %s", fh->name);
	}

	__wt_free(session, fh->name);
	__wt_free(session, fh);
	return (ret);
}
示例#21
0
/*
 * __wt_block_checkpoint_start --
 *	Start a checkpoint.
 */
int
__wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
	WT_DECL_RET;

	__wt_spin_lock(session, &block->live_lock);
	switch (block->ckpt_state) {
	case WT_CKPT_INPROGRESS:
	case WT_CKPT_PANIC_ON_FAILURE:
	case WT_CKPT_SALVAGE:
		__wt_err(session, EINVAL,
		    "%s: an unexpected checkpoint start: the checkpoint "
		    "has already started or was configured for salvage",
		    block->name);
		ret = __wt_block_panic(session);
		break;
	case WT_CKPT_NONE:
		block->ckpt_state = WT_CKPT_INPROGRESS;
		break;
	}
	__wt_spin_unlock(session, &block->live_lock);
	return (ret);
}
示例#22
0
/*
 * __wt_optrack_record_funcid --
 *	Allocate and record optrack function ID.
 */
void
__wt_optrack_record_funcid(
    WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
{
	static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	wt_off_t fsize;
	bool locked;

	conn = S2C(session);
	locked = false;

	WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp));

	__wt_spin_lock(session, &conn->optrack_map_spinlock);
	locked = true;
	if (*func_idp == 0) {
		*func_idp = ++optrack_uid;

		WT_ERR(__wt_buf_fmt(
		    session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
		WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize));
		WT_ERR(__wt_write(session,
		    conn->optrack_map_fh, fsize, tmp->size, tmp->data));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret,
		    "operation tracking initialization failure");
	}

	if (locked)
		__wt_spin_unlock(session, &conn->optrack_map_spinlock);
	__wt_scr_free(session, &tmp);
}
示例#23
0
/*
 * __wt_block_snapshot_resolve --
 *	Resolve a snapshot.
 */
int
__wt_block_snapshot_resolve(
    WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snapbase)
{
	WT_BLOCK_SNAPSHOT *si;
	WT_DECL_RET;

	si = &block->live;

	/*
	 * Snapshots are a two-step process: first, we write a new snapshot to
	 * disk (including all the new extent lists for modified snapshots and
	 * the live system).  As part of this we create a list of file blocks
	 * newly available for re-allocation, based on snapshots being deleted.
	 * We then return the locations of the new snapshot information to our
	 * caller.  Our caller has to write that information into some kind of
	 * stable storage, and once that's done, we can actually allocate from
	 * that list of newly available file blocks.  (We can't allocate from
	 * that list immediately because the allocation might happen before our
	 * caller saves the new snapshot information, and if we crashed before
	 * the new snapshot information was saved, we'd have overwritten blocks
	 * still referenced by snapshots in the system.)  In summary, there is
	 * a second step, after our caller saves the snapshot information, we
	 * are called to add the newly available blocks into the live system's
	 * available list.
	 */
	__wt_spin_lock(session, &block->live_lock);
	ret =
	    __wt_block_extlist_merge(session, &si->snapshot_avail, &si->avail);
	__wt_spin_unlock(session, &block->live_lock);

	/* Discard the list. */
	__wt_block_extlist_free(session, &si->snapshot_avail);

	WT_UNUSED(snapbase);
	return (ret);
}
示例#24
0
/*
 * __wt_block_ckpt_init --
 *	Initialize a checkpoint structure.
 */
int
__wt_block_ckpt_init(WT_SESSION_IMPL *session,
    WT_BLOCK *block, WT_BLOCK_CKPT *ci, const char *name, int is_live)
{
	WT_DECL_RET;

	/*
	 * If we're loading a new live checkpoint, there shouldn't be one
	 * already loaded.  The btree engine should prevent this from ever
	 * happening, but paranoia is a healthy thing.
	 */
	if (is_live) {
		__wt_spin_lock(session, &block->live_lock);
		if (block->live_load)
			ret = EINVAL;
		else
			block->live_load = 1;
		__wt_spin_unlock(session, &block->live_lock);
		if (ret)
			WT_RET_MSG(
			    session, EINVAL, "checkpoint already loaded");
	}

	memset(ci, 0, sizeof(*ci));

	ci->root_offset = WT_BLOCK_INVALID_OFFSET;

	WT_RET(__wt_block_extlist_init(session, &ci->alloc, name, "alloc"));
	WT_RET(__wt_block_extlist_init(session, &ci->avail, name, "avail"));
	WT_RET(__wt_block_extlist_init(session, &ci->discard, name, "discard"));

	ci->file_size = WT_BLOCK_DESC_SECTOR;
	WT_RET(__wt_block_extlist_init(
	    session, &ci->ckpt_avail, name, "ckpt_avail"));

	return (0);
}
示例#25
0
/*
 * __wt_block_compact_page_skip --
 *	Return if writing a particular page will shrink the file.
 */
int
__wt_block_compact_page_skip(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, size_t addr_size, int *skipp)
{
	WT_DECL_RET;
	WT_EXT *ext;
	WT_EXTLIST *el;
	WT_FH *fh;
	off_t ninety, offset;
	uint32_t size, cksum;

	WT_UNUSED(addr_size);
	*skipp = 1;				/* Return a default skip. */

	fh = block->fh;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	__wt_spin_lock(session, &block->live_lock);

	/*
	 * If this block is in the last 10% of the file and there's a block on
	 * the available list that's in the first 90% of the file, rewrite the
	 * block.  Checking the available list is necessary (otherwise writing
	 * the block would extend the file), but there's an obvious race if the
	 * file is sufficiently busy.
	 */
	ninety = fh->size - fh->size / 10;
	if (offset > ninety) {
		el = &block->live.avail;
		WT_EXT_FOREACH(ext, el->off)
			if (ext->off < ninety && ext->size >= size) {
				*skipp = 0;
				break;
			}
	}
示例#26
0
文件: cache_las.c 项目: hgGeorg/mongo
/*
 * __wt_las_cursor --
 *	Return a lookaside cursor.
 */
void
__wt_las_cursor(
    WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
{
	WT_CONNECTION_IMPL *conn;

	*cursorp = NULL;

	/*
	 * We don't want to get tapped for eviction after we start using the
	 * lookaside cursor; save a copy of the current eviction state, we'll
	 * turn eviction off before we return.
	 *
	 * Don't cache lookaside table pages, we're here because of eviction
	 * problems and there's no reason to believe lookaside pages will be
	 * useful more than once.
	 */
	*session_flags =
	    F_MASK(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);

	conn = S2C(session);

	/*
	 * Some threads have their own lookaside table cursors, else lock the
	 * shared lookaside cursor.
	 */
	if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
		*cursorp = session->las_cursor;
	else {
		__wt_spin_lock(session, &conn->las_lock);
		*cursorp = conn->las_session->las_cursor;
	}

	/* Turn caching and eviction off. */
	F_SET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
}
示例#27
0
/*
 * __wt_block_close --
 *	Close a block handle.
 */
int
__wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	if (block == NULL)				/* Safety check */
		return (0);

	conn = S2C(session);

	WT_TRET(__wt_verbose(session, WT_VERB_BLOCK,
	    "close: %s", block->name == NULL ? "" : block->name ));

	__wt_spin_lock(session, &conn->block_lock);

			/* Reference count is initialized to 1. */
	if (block->ref == 0 || --block->ref == 0)
		WT_TRET(__block_destroy(session, block));

	__wt_spin_unlock(session, &conn->block_lock);

	return (ret);
}
示例#28
0
/*
 * __wt_log_wrlsn --
 *	Process written log slots and attempt to coalesce them if the LSNs
 *	are contiguous.  The purpose of this function is to advance the
 *	write_lsn in LSN order after the buffer is written to the log file.
 */
int
__wt_log_wrlsn(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *coalescing, *slot;
	WT_LSN save_lsn;
	size_t written_i;
	uint32_t i, save_i;

	conn = S2C(session);
	log = conn->log;
	__wt_spin_lock(session, &log->log_writelsn_lock);
restart:
	coalescing = NULL;
	WT_INIT_LSN(&save_lsn);
	written_i = 0;
	i = 0;

	/*
	 * Walk the array once saving any slots that are in the
	 * WT_LOG_SLOT_WRITTEN state.
	 */
	while (i < WT_SLOT_POOL) {
		save_i = i;
		slot = &log->slot_pool[i++];
		/*
		 * XXX - During debugging I saw slot 0 become orphaned.
		 * I believe it is fixed, but check for now.
		 * This assertion should catch that.
		 */
		if (slot->slot_state == 0)
			WT_ASSERT(session,
			    slot->slot_release_lsn.file >= log->write_lsn.file);
		if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
			continue;
		written[written_i].slot_index = save_i;
		written[written_i++].lsn = slot->slot_release_lsn;
	}
	/*
	 * If we found any written slots process them.  We sort them
	 * based on the release LSN, and then look for them in order.
	 */
	if (written_i > 0) {
		WT_INSERTION_SORT(written, written_i,
		    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
		/*
		 * We know the written array is sorted by LSN.  Go
		 * through them either advancing write_lsn or coalesce
		 * contiguous ranges of written slots.
		 */
		for (i = 0; i < written_i; i++) {
			slot = &log->slot_pool[written[i].slot_index];
			/*
			 * The log server thread pushes out slots periodically.
			 * Sometimes they are empty slots.  If we find an
			 * empty slot, where empty means the start and end LSN
			 * are the same, free it and continue.
			 */
			if (__wt_log_cmp(&slot->slot_start_lsn,
			    &slot->slot_release_lsn) == 0 &&
			    __wt_log_cmp(&slot->slot_start_lsn,
			    &slot->slot_end_lsn) == 0) {
				__wt_log_slot_free(session, slot);
				continue;
			}
			if (coalescing != NULL) {
				/*
				 * If the write_lsn changed, we may be able to
				 * process slots.  Try again.
				 */
				if (__wt_log_cmp(
				    &log->write_lsn, &save_lsn) != 0)
					goto restart;
				if (__wt_log_cmp(&coalescing->slot_end_lsn,
				    &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to coalesce
				 * and free.
				 */
				coalescing->slot_last_offset =
				    slot->slot_last_offset;
				coalescing->slot_end_lsn = slot->slot_end_lsn;
				WT_STAT_FAST_CONN_INCR(
				    session, log_slot_coalesced);
				/*
				 * Copy the flag for later closing.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					F_SET(coalescing, WT_SLOT_CLOSEFH);
			} else {
				/*
				 * If this written slot is not the next LSN,
				 * try to start coalescing with later slots.
				 * A synchronous write may update write_lsn
				 * so save the last one we saw to check when
				 * coalescing slots.
				 */
				save_lsn = log->write_lsn;
				if (__wt_log_cmp(
				    &log->write_lsn, &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				WT_ASSERT(session, __wt_log_cmp(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				if (slot->slot_start_lsn.offset !=
				    slot->slot_last_offset)
					slot->slot_start_lsn.offset =
					    slot->slot_last_offset;
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);
				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_ERR(__wt_cond_signal(
					    session, conn->log_file_cond));
			}
			__wt_log_slot_free(session, slot);
		}
	}
err:	__wt_spin_unlock(session, &log->log_writelsn_lock);
	return (ret);
}
示例#29
0
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				close_end_lsn.file++;
				close_end_lsn.offset = 0;
				WT_ERR(__wt_fsync(session, close_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				WT_ERR(__wt_fsync(session, log->log_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					log->sync_lsn = min_lsn;
					WT_ERR(__wt_cond_signal(
					    session, log->log_sync_cond));
				}
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				WT_ERR(__wt_cond_signal(
				    session, conn->log_wrlsn_cond));
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
示例#30
0
文件: block_ckpt.c 项目: ksuarz/mongo
/*
 * __wt_block_checkpoint_load --
 *	Load a checkpoint.
 */
int
__wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
    const uint8_t *addr, size_t addr_size,
    uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
{
	WT_BLOCK_CKPT *ci, _ci;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	uint8_t *endp;

	ci = NULL;

	/*
	 * Sometimes we don't find a root page (we weren't given a checkpoint,
	 * or the checkpoint was empty).  In that case we return an empty root
	 * address, set that up now.
	 */
	*root_addr_sizep = 0;

#ifdef HAVE_VERBOSE
	if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
		if (addr != NULL) {
			WT_ERR(__wt_scr_alloc(session, 0, &tmp));
			WT_ERR(__ckpt_string(session, block, addr, tmp));
		}
		__wt_verbose(session, WT_VERB_CHECKPOINT,
		    "%s: load-checkpoint: %s", block->name,
		    addr == NULL ? "[Empty]" : (const char *)tmp->data);
	}
#endif

	/*
	 * There's a single checkpoint in the file that can be written, all of
	 * the others are read-only.  We use the same initialization calls for
	 * readonly checkpoints, but the information doesn't persist.
	 */
	if (checkpoint) {
		ci = &_ci;
		WT_ERR(__wt_block_ckpt_init(session, ci, "checkpoint"));
	} else {
		/*
		 * We depend on the btree level for locking: things will go bad
		 * fast if we open the live system in two handles, or salvage,
		 * truncate or verify the live/running file.
		 */
#ifdef HAVE_DIAGNOSTIC
		__wt_spin_lock(session, &block->live_lock);
		WT_ASSERT(session, block->live_open == false);
		block->live_open = true;
		__wt_spin_unlock(session, &block->live_lock);
#endif
		ci = &block->live;
		WT_ERR(__wt_block_ckpt_init(session, ci, "live"));
	}

	/*
	 * If the checkpoint has an on-disk root page, load it.  Otherwise, size
	 * the file past the description information.
	 */
	if (addr == NULL || addr_size == 0)
		ci->file_size = block->allocsize;
	else {
		/* Crack the checkpoint cookie. */
		WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci));

		/* Verify sets up next. */
		if (block->verify)
			WT_ERR(__wt_verify_ckpt_load(session, block, ci));

		/* Read any root page. */
		if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) {
			endp = root_addr;
			WT_ERR(__wt_block_addr_to_buffer(block, &endp,
			    ci->root_offset, ci->root_size, ci->root_checksum));
			*root_addr_sizep = WT_PTRDIFF(endp, root_addr);
		}

		/*
		 * Rolling a checkpoint forward requires the avail list, the
		 * blocks from which we can allocate.
		 */
		if (!checkpoint)
			WT_ERR(__wt_block_extlist_read_avail(
			    session, block, &ci->avail, ci->file_size));
	}

	/*
	 * If the checkpoint can be written, that means anything written after
	 * the checkpoint is no longer interesting, truncate the file.  Don't
	 * bother checking the avail list for a block at the end of the file,
	 * that was done when the checkpoint was first written (re-writing the
	 * checkpoint might possibly make it relevant here, but it's unlikely
	 * enough I don't bother).
	 */
	if (!checkpoint)
		WT_ERR(__wt_block_truncate(session, block, ci->file_size));

	if (0) {
err:		/*
		 * Don't call checkpoint-unload: unload does real work including
		 * file truncation.  If we fail early enough that the checkpoint
		 * information isn't correct, bad things would happen.  The only
		 * allocated memory was in the service of verify, clean that up.
		 */
		if (block->verify)
			WT_TRET(__wt_verify_ckpt_unload(session, block));
	}

	/* Checkpoints don't need the original information, discard it. */
	if (checkpoint && ci != NULL)
		__wt_block_ckpt_destroy(session, ci);

	__wt_scr_free(session, &tmp);
	return (ret);
}