コード例 #1
0
ファイル: os_alloc.c プロジェクト: Machyne/mongo
/*
 * __realloc_func --
 *	ANSI realloc function.
 */
static int
__realloc_func(WT_SESSION_IMPL *session,
    size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory,
    void *retp)
{
	void *p;
	size_t bytes_allocated;

	/*
	 * !!!
	 * This function MUST handle a NULL WT_SESSION_IMPL handle.
	 *
	 * Sometimes we're allocating memory and we don't care about the
	 * final length -- bytes_allocated_ret may be NULL.
	 */
	p = *(void **)retp;
	bytes_allocated =
	    (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret;
	WT_ASSERT(session,
	    (p == NULL && bytes_allocated == 0) ||
	    (p != NULL &&
	    (bytes_allocated_ret == NULL || bytes_allocated != 0)));
	WT_ASSERT(session, bytes_to_allocate != 0);
	WT_ASSERT(session, bytes_allocated < bytes_to_allocate);

	if (session != NULL) {
		if (p == NULL)
			WT_STAT_CONN_INCR(session, memory_allocation);
		else
			WT_STAT_CONN_INCR(session, memory_grow);
	}

	if ((p = realloc(p, bytes_to_allocate)) == NULL)
		WT_RET_MSG(session, __wt_errno(),
		    "memory allocation of %" WT_SIZET_FMT " bytes failed",
		    bytes_to_allocate);

	/*
	 * Clear the allocated memory, parts of WiredTiger depend on allocated
	 * memory being cleared.
	 */
	if (clear_memory)
		memset((uint8_t *)p + bytes_allocated,
		    0, bytes_to_allocate - bytes_allocated);

	/* Update caller's bytes allocated value. */
	if (bytes_allocated_ret != NULL)
		*bytes_allocated_ret = bytes_to_allocate;

	*(void **)retp = p;
	return (0);
}
コード例 #2
0
/*
 * __curlog_search --
 *	WT_CURSOR.search method for the log cursor type.
 */
static int
__curlog_search(WT_CURSOR *cursor)
{
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_LSN key;
	WT_SESSION_IMPL *session;
	uint32_t counter, key_file, key_offset, raw;

	cl = (WT_CURSOR_LOG *)cursor;
	/* Temporarily turn off raw so we can do direct cursor operations. */
	raw = F_MASK(cursor, WT_CURSTD_RAW);
	F_CLR(cursor, WT_CURSTD_RAW);

	CURSOR_API_CALL(cursor, session, search, NULL);

	/*
	 * !!! We are ignoring the counter and only searching based on the LSN.
	 */
	WT_ERR(__wt_cursor_get_key(cursor, &key_file, &key_offset, &counter));
	WT_SET_LSN(&key, key_file, key_offset);
	ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE,
	    __curlog_logrec, cl);
	if (ret == ENOENT)
		ret = WT_NOTFOUND;
	WT_ERR(ret);
	WT_ERR(__curlog_kv(session, cursor));
	WT_STAT_CONN_INCR(session, cursor_search);
	WT_STAT_DATA_INCR(session, cursor_search);

err:	F_SET(cursor, raw);
	API_END_RET(session, ret);
}
コード例 #3
0
ファイル: cur_ds.c プロジェクト: adityavs/wiredtiger
/*
 * __curds_remove --
 *	WT_CURSOR.remove method for the data-source cursor type.
 */
static int
__curds_remove(WT_CURSOR *cursor)
{
	WT_CURSOR *source;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;

	CURSOR_REMOVE_API_CALL(cursor, session, NULL);

	WT_STAT_CONN_INCR(session, cursor_remove);
	WT_STAT_DATA_INCR(session, cursor_remove);
	WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);

	WT_ERR(__curds_txn_enter(session, true));

	WT_ERR(__curds_key_set(cursor));
	ret = __curds_cursor_resolve(cursor, source->remove(source));

err:	__curds_txn_leave(session);

	CURSOR_UPDATE_API_END(session, ret);
	return (ret);
}
コード例 #4
0
ファイル: cur_log.c プロジェクト: Machyne/mongo
/*
 * __curlog_search --
 *	WT_CURSOR.search method for the log cursor type.
 */
static int
__curlog_search(WT_CURSOR *cursor)
{
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_LSN key;
	WT_SESSION_IMPL *session;
	uint32_t counter, key_file, key_offset;

	cl = (WT_CURSOR_LOG *)cursor;

	CURSOR_API_CALL(cursor, session, search, NULL);

	/*
	 * !!! We are ignoring the counter and only searching based on the LSN.
	 */
	WT_ERR(__wt_cursor_get_key((WT_CURSOR *)cl,
	    &key_file, &key_offset, &counter));
	WT_SET_LSN(&key, key_file, key_offset);
	ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE,
	    __curlog_logrec, cl);
	if (ret == ENOENT)
		ret = WT_NOTFOUND;
	WT_ERR(ret);
	WT_ERR(__curlog_kv(session, cursor));
	WT_STAT_CONN_INCR(session, cursor_search);
	WT_STAT_DATA_INCR(session, cursor_search);

err:	API_END_RET(session, ret);
}
コード例 #5
0
ファイル: cur_ds.c プロジェクト: adityavs/wiredtiger
/*
 * __curds_insert --
 *	WT_CURSOR.insert method for the data-source cursor type.
 */
static int
__curds_insert(WT_CURSOR *cursor)
{
	WT_CURSOR *source;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;

	CURSOR_UPDATE_API_CALL(cursor, session, insert);

	WT_ERR(__curds_txn_enter(session, true));

	WT_STAT_CONN_INCR(session, cursor_insert);
	WT_STAT_DATA_INCR(session, cursor_insert);
	WT_STAT_DATA_INCRV(session,
	    cursor_insert_bytes, cursor->key.size + cursor->value.size);

	if (!F_ISSET(cursor, WT_CURSTD_APPEND))
		WT_ERR(__curds_key_set(cursor));
	WT_ERR(__curds_value_set(cursor));
	ret = __curds_cursor_resolve(cursor, source->insert(source));

err:	__curds_txn_leave(session);

	CURSOR_UPDATE_API_END(session, ret);
	return (ret);
}
コード例 #6
0
ファイル: txn.c プロジェクト: bsamek/wiredtiger
/*
 * __wt_txn_global_shutdown --
 *	Shut down the global transaction state.
 */
int
__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
{
	bool txn_active;

	/*
	 * We're shutting down.  Make sure everything gets freed.
	 *
	 * It's possible that the eviction server is in the middle of a long
	 * operation, with a transaction ID pinned.  In that case, we will loop
	 * here until the transaction ID is released, when the oldest
	 * transaction ID will catch up with the current ID.
	 */
	for (;;) {
		WT_RET(__wt_txn_activity_check(session, &txn_active));
		if (!txn_active)
			break;

		WT_STAT_CONN_INCR(session, txn_release_blocked);
		__wt_yield();
	}

#ifdef HAVE_TIMESTAMPS
	/*
	 * Now that all transactions have completed, no timestamps should be
	 * pinned.
	 */
	__wt_timestamp_set_inf(&S2C(session)->txn_global.pinned_timestamp);
#endif

	return (0);
}
コード例 #7
0
ファイル: os_alloc.c プロジェクト: Machyne/mongo
/*
 * __wt_malloc --
 *	ANSI malloc function.
 */
int
__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp)
{
	void *p;

	/*
	 * Defensive: if our caller doesn't handle errors correctly, ensure a
	 * free won't fail.
	 */
	*(void **)retp = NULL;

	/*
	 * !!!
	 * This function MUST handle a NULL WT_SESSION_IMPL handle.
	 */
	WT_ASSERT(session, bytes_to_allocate != 0);

	if (session != NULL)
		WT_STAT_CONN_INCR(session, memory_allocation);

	if ((p = malloc(bytes_to_allocate)) == NULL)
		WT_RET_MSG(session, __wt_errno(),
		    "memory allocation of %" WT_SIZET_FMT " bytes failed",
		    bytes_to_allocate);

	*(void **)retp = p;
	return (0);
}
コード例 #8
0
ファイル: os_alloc.c プロジェクト: Machyne/mongo
/*
 * __wt_free_int --
 *	ANSI free function.
 */
void
__wt_free_int(WT_SESSION_IMPL *session, const void *p_arg)
{
	void *p;

	p = *(void **)p_arg;
	if (p == NULL)				/* ANSI C free semantics */
		return;

	/*
	 * If there's a serialization bug we might race with another thread.
	 * We can't avoid the race (and we aren't willing to flush memory),
	 * but we minimize the window by clearing the free address, hoping a
	 * racing thread will see, and won't free, a NULL pointer.
	 */
	*(void **)p_arg = NULL;

	/*
	 * !!!
	 * This function MUST handle a NULL WT_SESSION_IMPL handle.
	 */
	if (session != NULL)
		WT_STAT_CONN_INCR(session, memory_free);

	free(p);
}
コード例 #9
0
/*
 * __wt_log_slot_switch --
 *	Switch out the current slot and set up a new one.
 */
int
__wt_log_slot_switch(WT_SESSION_IMPL *session,
    WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work)
{
	WT_DECL_RET;
	WT_LOG *log;

	log = S2C(session)->log;

	/*
	 * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the
	 * compiler does not like it combined directly with the while loop
	 * here.
	 *
	 * The loop conditional is a bit complex.  We have to retry if we
	 * closed the slot but were unable to set up a new slot.  In that
	 * case the flag indicating we have closed the slot will still be set.
	 * We have to retry in that case regardless of the retry setting
	 * because we are responsible for setting up the new slot.
	 */
	do {
		WT_WITH_SLOT_LOCK(session, log,
		    ret = __log_slot_switch_internal(
		    session, myslot, forced, did_work));
		if (ret == EBUSY) {
			WT_STAT_CONN_INCR(session, log_slot_switch_busy);
			__wt_yield();
		}
		WT_RET(WT_SESSION_CHECK_PANIC(session));
		if (F_ISSET(S2C(session), WT_CONN_CLOSING))
			break;
	} while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY));
	return (ret);
}
コード例 #10
0
ファイル: cur_log.c プロジェクト: Machyne/mongo
/*
 * __curlog_next --
 *	WT_CURSOR.next method for the step log cursor type.
 */
static int
__curlog_next(WT_CURSOR *cursor)
{
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	cl = (WT_CURSOR_LOG *)cursor;

	CURSOR_API_CALL(cursor, session, next, NULL);

	/*
	 * If we don't have a record, or went to the end of the record we
	 * have, or we are in the zero-fill portion of the record, get a
	 * new one.
	 */
	if (cl->stepp == NULL || cl->stepp >= cl->stepp_end || !*cl->stepp) {
		cl->txnid = 0;
		ret = __wt_log_scan(session, cl->next_lsn, WT_LOGSCAN_ONE,
		    __curlog_logrec, cl);
		if (ret == ENOENT)
			ret = WT_NOTFOUND;
		WT_ERR(ret);
	}
	WT_ASSERT(session, cl->logrec->data != NULL);
	WT_ERR(__curlog_kv(session, cursor));
	WT_STAT_CONN_INCR(session, cursor_next);
	WT_STAT_DATA_INCR(session, cursor_next);

err:	API_END_RET(session, ret);

}
コード例 #11
0
ファイル: bt_ovfl.c プロジェクト: adityavs/wiredtiger
/*
 * __ovfl_read --
 *	Read an overflow item from the disk.
 */
static int
__ovfl_read(WT_SESSION_IMPL *session,
    const uint8_t *addr, size_t addr_size, WT_ITEM *store)
{
	WT_BTREE *btree;
	const WT_PAGE_HEADER *dsk;

	btree = S2BT(session);

	/*
	 * Read the overflow item from the block manager, then reference the
	 * start of the data and set the data's length.
	 *
	 * Overflow reads are synchronous. That may bite me at some point, but
	 * WiredTiger supports large page sizes, overflow items should be rare.
	 */
	WT_RET(__wt_bt_read(session, store, addr, addr_size));
	dsk = store->data;
	store->data = WT_PAGE_HEADER_BYTE(btree, dsk);
	store->size = dsk->u.datalen;

	WT_STAT_CONN_INCR(session, cache_read_overflow);
	WT_STAT_DATA_INCR(session, cache_read_overflow);

	return (0);
}
コード例 #12
0
ファイル: conn_log.c プロジェクト: Machyne/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LSN prev;
	WT_SESSION_IMPL *session;
	int yield;
	bool did_work;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	WT_INIT_LSN(&prev);
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Write out any log record buffers if anything was done
		 * since last time.  Only call the function to walk the
		 * slots if the system is not idle.  On an idle system
		 * the alloc_lsn will not advance and the written lsn will
		 * match the alloc_lsn.
		 */
		if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
		    __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
			__wt_log_wrlsn(session, &yield);
		else
			WT_STAT_CONN_INCR(session, log_write_lsn_skip);
		prev = log->alloc_lsn;
		did_work = yield == 0;

		/*
		 * If __wt_log_wrlsn did work we want to yield instead of sleep.
		 */
		if (yield++ < WT_THOUSAND)
			__wt_yield();
		else
			/*
			 * Send in false because if we did any work we would
			 * not be on this path.
			 */
			__wt_cond_auto_wait(
			    session, conn->log_wrlsn_cond, did_work);
	}
	/*
	 * On close we need to do this one more time because there could
	 * be straggling log writes that need to be written.
	 */
	WT_ERR(__wt_log_force_write(session, 1, NULL));
	__wt_log_wrlsn(session, NULL);
	if (0) {
err:		__wt_err(session, ret, "log wrlsn server error");
	}
	return (WT_THREAD_RET_VALUE);
}
コード例 #13
0
ファイル: conn_log.c プロジェクト: mpobrien/mongo
/*
 * __log_prealloc_once --
 *	Perform one iteration of log pre-allocation.
 */
static int
__log_prealloc_once(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	u_int i, reccount;
	char **recfiles;

	conn = S2C(session);
	log = conn->log;
	reccount = 0;
	recfiles = NULL;

	/*
	 * Allocate up to the maximum number, accounting for any existing
	 * files that may not have been used yet.
	 */
	WT_ERR(__wt_fs_directory_list(
	    session, conn->log_path, WT_LOG_PREPNAME, &recfiles, &reccount));

	/*
	 * Adjust the number of files to pre-allocate if we find that
	 * the critical path had to allocate them since we last ran.
	 */
	if (log->prep_missed > 0) {
		conn->log_prealloc += log->prep_missed;
		__wt_verbose(session, WT_VERB_LOG,
		    "Missed %" PRIu32 ". Now pre-allocating up to %" PRIu32,
		    log->prep_missed, conn->log_prealloc);
	}
	WT_STAT_CONN_SET(session, log_prealloc_max, conn->log_prealloc);
	/*
	 * Allocate up to the maximum number that we just computed and detected.
	 */
	for (i = reccount; i < (u_int)conn->log_prealloc; i++) {
		WT_ERR(__wt_log_allocfile(
		    session, ++log->prep_fileid, WT_LOG_PREPNAME));
		WT_STAT_CONN_INCR(session, log_prealloc_files);
	}
	/*
	 * Reset the missed count now.  If we missed during pre-allocating
	 * the log files, it means the allocation is not keeping up, not that
	 * we didn't allocate enough.  So we don't just want to keep adding
	 * in more.
	 */
	log->prep_missed = 0;

	if (0)
err:		__wt_err(session, ret, "log pre-alloc server error");
	WT_TRET(__wt_fs_directory_list_free(session, &recfiles, reccount));
	return (ret);
}
コード例 #14
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_reset --
 *	Invalidate the cursor position.
 */
int
__wt_btcur_reset(WT_CURSOR_BTREE *cbt)
{
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)cbt->iface.session;

	WT_STAT_CONN_INCR(session, cursor_reset);
	WT_STAT_DATA_INCR(session, cursor_reset);

	return (__cursor_reset(cbt));
}
コード例 #15
0
/*
 * __wt_bm_read --
 *	Map or read address cookie referenced block into a buffer.
 */
int
__wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
    WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_RET;
	WT_FILE_HANDLE *handle;
	wt_off_t offset;
	uint32_t checksum, size;
	bool mapped;

	WT_UNUSED(addr_size);
	block = bm->block;

	/* Crack the cookie. */
	WT_RET(
	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));

	/*
	 * Map the block if it's possible.
	 */
	handle = block->fh->handle;
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped && handle->fh_map_preload != NULL) {
		buf->data = (uint8_t *)bm->map + offset;
		buf->size = size;
		ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
		    buf->data, buf->size,bm->mapped_cookie);

		WT_STAT_CONN_INCR(session, block_map_read);
		WT_STAT_CONN_INCRV(session, block_byte_map_read, size);
		return (ret);
	}

#ifdef HAVE_DIAGNOSTIC
	/*
	 * In diagnostic mode, verify the block we're about to read isn't on
	 * the available list, or for live systems, the discard list.
	 */
	WT_RET(__wt_block_misplaced(session,
	    block, "read", offset, size, bm->is_live, __func__, __LINE__));
#endif
	/* Read the block. */
	__wt_capacity_throttle(session, size, WT_THROTTLE_READ);
	WT_RET(
	    __wt_block_read_off(session, block, buf, offset, size, checksum));

	/* Optionally discard blocks from the system's buffer cache. */
	WT_RET(__wt_block_discard(session, block, (size_t)size));

	return (0);
}
コード例 #16
0
ファイル: async_op.c プロジェクト: ksuarz/mongo
/*
 * __async_search --
 *	WT_ASYNC_OP->search implementation for op handles.
 */
static int
__async_search(WT_ASYNC_OP *asyncop)
{
	WT_ASYNC_OP_IMPL *op;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	op = (WT_ASYNC_OP_IMPL *)asyncop;
	ASYNCOP_API_CALL(O2C(op), session, search);
	WT_STAT_CONN_INCR(O2S(op), async_op_search);
	WT_ERR(__async_op_wrap(op, WT_AOP_SEARCH));
err:	API_END_RET(session, ret);
}
コード例 #17
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __cursor_truncate_fix --
 *	Discard a cursor range from fixed-width column-store tree.
 */
static int
__cursor_truncate_fix(WT_SESSION_IMPL *session,
    WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
    int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool))
{
	WT_DECL_RET;
	const uint8_t *value;

	/*
	 * Handle fixed-length column-store objects separately: for row-store
	 * and variable-length column-store objects we have "deleted" values
	 * and so returned objects actually exist: fixed-length column-store
	 * objects are filled-in if they don't exist, that is, if you create
	 * record 37, records 1-36 magically appear.  Those records can't be
	 * deleted, which means we have to ignore already "deleted" records.
	 *
	 * First, call the standard cursor remove method to do a full search and
	 * re-position the cursor because we don't have a saved copy of the
	 * page's write generation information, which we need to remove records.
	 * Once that's done, we can delete records without a full search, unless
	 * we encounter a restart error because the page was modified by some
	 * other thread of control; in that case, repeat the full search to
	 * refresh the page's modification information.
	 */
retry:	WT_RET(__wt_btcur_remove(start));
	/*
	 * Reset ret each time through so that we don't loop forever in
	 * the cursor equals case.
	 */
	for (ret = 0;;) {
		if (stop != NULL && __cursor_equals(start, stop))
			break;
		if ((ret = __wt_btcur_next(start, true)) != 0)
			break;
		start->compare = 0;	/* Exact match */
		value = (const uint8_t *)start->iface.value.data;
		if (*value != 0 &&
		    (ret = rmfunc(session, start, 1)) != 0)
			break;
	}

	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}

	WT_RET_NOTFOUND_OK(ret);
	return (0);
}
コード例 #18
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __cursor_truncate --
 *	Discard a cursor range from row-store or variable-width column-store
 * tree.
 */
static int
__cursor_truncate(WT_SESSION_IMPL *session,
    WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
    int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool))
{
	WT_DECL_RET;

	/*
	 * First, call the standard cursor remove method to do a full search and
	 * re-position the cursor because we don't have a saved copy of the
	 * page's write generation information, which we need to remove records.
	 * Once that's done, we can delete records without a full search, unless
	 * we encounter a restart error because the page was modified by some
	 * other thread of control; in that case, repeat the full search to
	 * refresh the page's modification information.
	 *
	 * If this is a row-store, we delete leaf pages having no overflow items
	 * without reading them; for that to work, we have to ensure we read the
	 * page referenced by the ending cursor, since we may be deleting only a
	 * partial page at the end of the truncation.  Our caller already fully
	 * instantiated the end cursor, so we know that page is pinned in memory
	 * and we can proceed without concern.
	 */
retry:	WT_RET(__wt_btcur_remove(start));

	/*
	 * Reset ret each time through so that we don't loop forever in
	 * the cursor equals case.
	 */
	for (ret = 0;;) {
		if (stop != NULL && __cursor_equals(start, stop))
			break;
		if ((ret = __wt_btcur_next(start, true)) != 0)
			break;
		start->compare = 0;	/* Exact match */
		if ((ret = rmfunc(session, start, 1)) != 0)
			break;
	}

	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}

	WT_RET_NOTFOUND_OK(ret);
	return (0);
}
コード例 #19
0
/*
 * __wt_session_compact_readonly --
 *	WT_SESSION.compact method; readonly version.
 */
int
__wt_session_compact_readonly(
    WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	WT_UNUSED(uri);
	WT_UNUSED(config);

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL_NOCONF(session, compact);

	WT_STAT_CONN_INCR(session, session_table_compact_fail);
	ret = __wt_session_notsup(session);
err:	API_END_RET(session, ret);
}
コード例 #20
0
ファイル: cur_ds.c プロジェクト: adityavs/wiredtiger
/*
 * __curds_reset --
 *	WT_CURSOR.reset method for the data-source cursor type.
 */
static int
__curds_reset(WT_CURSOR *cursor)
{
	WT_CURSOR *source;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;

	CURSOR_API_CALL(cursor, session, reset, NULL);

	WT_STAT_CONN_INCR(session, cursor_reset);
	WT_STAT_DATA_INCR(session, cursor_reset);

	WT_ERR(source->reset(source));

	F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);

err:	API_END_RET(session, ret);
}
コード例 #21
0
ファイル: cur_ds.c プロジェクト: adityavs/wiredtiger
/*
 * __curds_prev --
 *	WT_CURSOR.prev method for the data-source cursor type.
 */
static int
__curds_prev(WT_CURSOR *cursor)
{
	WT_CURSOR *source;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;

	CURSOR_API_CALL(cursor, session, prev, NULL);

	WT_STAT_CONN_INCR(session, cursor_prev);
	WT_STAT_DATA_INCR(session, cursor_prev);

	WT_ERR(__curds_txn_enter(session, false));

	F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
	ret = __curds_cursor_resolve(cursor, source->prev(source));

err:	__curds_txn_leave(session);
	API_END_RET(session, ret);
}
コード例 #22
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_update_check --
 *	Check whether an update would conflict.
 *
 *	This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so
 *	they only check for conflicts without updating the tree.  It is used to
 *	maintain snapshot isolation for transactions that span multiple chunks
 *	in an LSM tree.
 */
int
__wt_btcur_update_check(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	cursor = &cbt->iface;
	btree = cbt->btree;
	session = (WT_SESSION_IMPL *)cursor->session;

retry:	WT_RET(__cursor_func_init(cbt, true));

	switch (btree->type) {
	case BTREE_ROW:
		WT_ERR(__cursor_row_search(session, cbt, NULL, true));

		/*
		 * Just check for conflicts.
		 */
		ret = __curfile_update_check(cbt);
		break;
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		WT_ERR(__wt_illegal_value(session, NULL));
		break;
	}

err:	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}
	WT_TRET(__curfile_leave(cbt));
	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));
	return (ret);
}
コード例 #23
0
ファイル: conn_log.c プロジェクト: mpobrien/mongo
/*
 * __logmgr_force_ckpt --
 *	Force a checkpoint out, waiting for the checkpoint LSN in the log
 *	is up to the given log number.
 */
static int
__logmgr_force_ckpt(WT_SESSION_IMPL *session, uint32_t lognum)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	WT_SESSION_IMPL *tmp_session;
	int yield;

	conn = S2C(session);
	log = conn->log;
	yield = 0;
	WT_RET(__wt_open_internal_session(conn,
	    "compatibility-reconfig", true, 0, &tmp_session));
	while (log->ckpt_lsn.l.file < lognum) {
		/*
		 * Force a checkpoint to be written in the new log file and
		 * force the archiving of all previous log files.  We do the
		 * checkpoint in the loop because the checkpoint LSN in the
		 * log record could still reflect the previous log file in
		 * cases such as the write LSN has not yet advanced into the
		 * new log file due to another group of threads still in
		 * progress with their slot copies or writes.
		 */
		WT_RET(tmp_session->iface.checkpoint(
		    &tmp_session->iface, "force=1"));
		WT_RET(WT_SESSION_CHECK_PANIC(tmp_session));
		/*
		 * Only sleep in the rare case that we had to come through
		 * this loop more than once.
		 */
		if (yield++) {
			WT_STAT_CONN_INCR(session, log_force_ckpt_sleep);
			__wt_sleep(0, WT_THOUSAND);
		}
	}
	WT_RET(tmp_session->iface.close(&tmp_session->iface, NULL));
	return (0);
}
コード例 #24
0
ファイル: block_read.c プロジェクト: DINKIN/mongo
/*
 * __wt_bm_preload --
 *	Pre-load a page.
 */
int
__wt_bm_preload(
    WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_FILE_HANDLE *handle;
	wt_off_t offset;
	uint32_t checksum, size;
	bool mapped;

	block = bm->block;

	WT_STAT_CONN_INCR(session, block_preload);

	/* Crack the cookie. */
	WT_RET(
	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));

	handle = block->fh->handle;
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped && handle->fh_map_preload != NULL)
		ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
		    (uint8_t *)bm->map + offset, size, bm->mapped_cookie);
	if (!mapped && handle->fh_advise != NULL)
		ret = handle->fh_advise(handle, (WT_SESSION *)session,
		    offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED);
	if (ret != EBUSY && ret != ENOTSUP)
		return (ret);

	/* If preload isn't supported, do it the slow way. */
	WT_RET(__wt_scr_alloc(session, 0, &tmp));
	ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
	__wt_scr_free(session, &tmp);

	return (ret);
}
コード例 #25
0
ファイル: cur_ds.c プロジェクト: adityavs/wiredtiger
/*
 * __curds_search --
 *	WT_CURSOR.search method for the data-source cursor type.
 */
static int
__curds_search(WT_CURSOR *cursor)
{
	WT_CURSOR *source;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;

	CURSOR_API_CALL(cursor, session, search, NULL);

	WT_STAT_CONN_INCR(session, cursor_search);
	WT_STAT_DATA_INCR(session, cursor_search);

	WT_ERR(__curds_txn_enter(session, false));

	WT_ERR(__curds_key_set(cursor));
	ret = __curds_cursor_resolve(cursor, source->search(source));

err:	__curds_txn_leave(session);

	API_END_RET(session, ret);
}
コード例 #26
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_next_random --
 *	Move to a random record in the tree. There are two algorithms, one
 *	where we select a record at random from the whole tree on each
 *	retrieval and one where we first select a record at random from the
 *	whole tree, and then subsequently sample forward from that location.
 *	The sampling approach allows us to select reasonably uniform random
 *	points from unbalanced trees.
 */
int
__wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;
	wt_off_t size;
	uint64_t skip;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	btree = cbt->btree;

	/*
	 * Only supports row-store: applications can trivially select a random
	 * value from a column-store, if there were any reason to do so.
	 */
	if (btree->type != BTREE_ROW)
		WT_RET_MSG(session, ENOTSUP,
		    "WT_CURSOR.next_random only supported by row-store tables");

	WT_STAT_CONN_INCR(session, cursor_next);
	WT_STAT_DATA_INCR(session, cursor_next);

	/*
	 * If retrieving random values without sampling, or we don't have a
	 * page reference, pick a roughly random leaf page in the tree.
	 */
	if (cbt->ref == NULL || cbt->next_random_sample_size == 0) {
		/*
		 * Skip past the sample size of the leaf pages in the tree
		 * between each random key return to compensate for unbalanced
		 * trees.
		 *
		 * Use the underlying file size divided by its block allocation
		 * size as our guess of leaf pages in the file (this can be
		 * entirely wrong, as it depends on how many pages are in this
		 * particular checkpoint, how large the leaf and internal pages
		 * really are, and other factors). Then, divide that value by
		 * the configured sample size and increment the final result to
		 * make sure tiny files don't leave us with a skip value of 0.
		 *
		 * !!!
		 * Ideally, the number would be prime to avoid restart issues.
		 */
		if (cbt->next_random_sample_size != 0) {
			WT_ERR(btree->bm->size(btree->bm, session, &size));
			cbt->next_random_leaf_skip = (uint64_t)
			    ((size / btree->allocsize) /
			    cbt->next_random_sample_size) + 1;
		}

		/*
		 * Choose a leaf page from the tree.
		 */
		WT_ERR(__cursor_func_init(cbt, true));
		WT_WITH_PAGE_INDEX(
		    session, ret = __wt_row_random_descent(session, cbt));
		WT_ERR(ret);
	} else {
		/*
		 * Read through the tree, skipping leaf pages. Be cautious about
		 * the skip count: if the last leaf page skipped was also the
		 * last leaf page in the tree, it may be set to zero on return
		 * with the end-of-walk condition.
		 *
		 * Pages read for data sampling aren't "useful"; don't update
		 * the read generation of pages already in memory, and if a page
		 * is read, set its generation to a low value so it is evicted
		 * quickly.
		 */
		for (skip =
		    cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;)
			WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip,
			    WT_READ_NO_GEN |
			    WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
	}

	/*
	 * Select a random entry from the leaf page. If it's not valid, move to
	 * the next entry, if that doesn't work, move to the previous entry.
	 */
	WT_ERR(__wt_row_random_leaf(session, cbt));
	if (__cursor_valid(cbt, &upd))
		WT_ERR(__wt_kv_return(session, cbt, upd));
	else {
		if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
			ret = __wt_btcur_prev(cbt, false);
		WT_ERR(ret);
	}
	return (0);

err:	WT_TRET(__cursor_reset(cbt));
	return (ret);
}
コード例 #27
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_update --
 *	Update a record in the tree.
 */
int
__wt_btcur_update(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;

	WT_STAT_CONN_INCR(session, cursor_update);
	WT_STAT_DATA_INCR(session, cursor_update);
	WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));
	WT_RET(__cursor_size_chk(session, &cursor->value));

	/*
	 * The tree is no longer empty: eviction should pay attention to it,
	 * and it's no longer possible to bulk-load into it.
	 */
	if (btree->bulk_load_ok) {
		btree->bulk_load_ok = false;
		__wt_btree_evictable(session, true);
	}

retry:	WT_RET(__cursor_func_init(cbt, true));

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		WT_ERR(__cursor_col_search(session, cbt, NULL));

		/*
		 * If not overwriting, fail if the key doesn't exist.  If we
		 * find an update for the key, check for conflicts.  Update the
		 * record if it exists.  Creating a record past the end of the
		 * tree in a fixed-length column-store implicitly fills the gap
		 * with empty records.  Update the record in that case, the
		 * record exists.
		 */
		if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
			WT_ERR(__curfile_update_check(cbt));
			if ((cbt->compare != 0 || !__cursor_valid(cbt, NULL)) &&
			    !__cursor_fix_implicit(btree, cbt))
				WT_ERR(WT_NOTFOUND);
		}
		ret = __cursor_col_modify(session, cbt, false);
		break;
	case BTREE_ROW:
		WT_ERR(__cursor_row_search(session, cbt, NULL, true));
		/*
		 * If not overwriting, check for conflicts and fail if the key
		 * does not exist.
		 */
		if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
			WT_ERR(__curfile_update_check(cbt));
			if (cbt->compare != 0 || !__cursor_valid(cbt, NULL))
				WT_ERR(WT_NOTFOUND);
		}
		ret = __cursor_row_modify(session, cbt, false);
		break;
	}

err:	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}

	/*
	 * If successful, point the cursor at internal copies of the data.  We
	 * could shuffle memory in the cursor so the key/value pair are in local
	 * buffer memory, but that's a data copy.  We don't want to do another
	 * search (and we might get a different update structure if we race).
	 * To make this work, we add a field to the btree cursor to pass back a
	 * pointer to the modify function's allocated update structure.
	 */
	if (ret == 0)
		WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update));

	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));
	return (ret);
}
コード例 #28
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_remove --
 *	Remove a record from the tree.
 */
int
__wt_btcur_remove(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;

	WT_STAT_CONN_INCR(session, cursor_remove);
	WT_STAT_DATA_INCR(session, cursor_remove);
	WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);

retry:	WT_RET(__cursor_func_init(cbt, true));

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		WT_ERR(__cursor_col_search(session, cbt, NULL));

		/*
		 * If we find a matching record, check whether an update would
		 * conflict.  Do this before checking if the update is visible
		 * in __cursor_valid, or we can miss conflict.
		 */
		WT_ERR(__curfile_update_check(cbt));

		/* Remove the record if it exists. */
		if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) {
			if (!__cursor_fix_implicit(btree, cbt))
				WT_ERR(WT_NOTFOUND);
			/*
			 * Creating a record past the end of the tree in a
			 * fixed-length column-store implicitly fills the
			 * gap with empty records.  Return success in that
			 * case, the record was deleted successfully.
			 *
			 * Correct the btree cursor's location: the search
			 * will have pointed us at the previous/next item,
			 * and that's not correct.
			 */
			cbt->recno = cursor->recno;
		} else
			ret = __cursor_col_modify(session, cbt, true);
		break;
	case BTREE_ROW:
		/* Remove the record if it exists. */
		WT_ERR(__cursor_row_search(session, cbt, NULL, false));

		/* Check whether an update would conflict. */
		WT_ERR(__curfile_update_check(cbt));

		if (cbt->compare != 0 || !__cursor_valid(cbt, NULL))
			WT_ERR(WT_NOTFOUND);

		ret = __cursor_row_modify(session, cbt, true);
		break;
	}

err:	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}
	/*
	 * If the cursor is configured to overwrite and the record is not
	 * found, that is exactly what we want.
	 */
	if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND)
		ret = 0;

	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));

	return (ret);
}
コード例 #29
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_insert --
 *	Insert a record into the tree.
 */
int
__wt_btcur_insert(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;

	WT_STAT_CONN_INCR(session, cursor_insert);
	WT_STAT_DATA_INCR(session, cursor_insert);
	WT_STAT_DATA_INCRV(session,
	    cursor_insert_bytes, cursor->key.size + cursor->value.size);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));
	WT_RET(__cursor_size_chk(session, &cursor->value));

	/*
	 * The tree is no longer empty: eviction should pay attention to it,
	 * and it's no longer possible to bulk-load into it.
	 */
	if (btree->bulk_load_ok) {
		btree->bulk_load_ok = false;
		__wt_btree_evictable(session, true);
	}

retry:	WT_RET(__cursor_func_init(cbt, true));

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		/*
		 * If WT_CURSTD_APPEND is set, insert a new record (ignoring
		 * the application's record number). The real record number
		 * is assigned by the serialized append operation.
		 */
		if (F_ISSET(cursor, WT_CURSTD_APPEND))
			cbt->iface.recno = WT_RECNO_OOB;

		WT_ERR(__cursor_col_search(session, cbt, NULL));

		/*
		 * If not overwriting, fail if the key exists.  Creating a
		 * record past the end of the tree in a fixed-length
		 * column-store implicitly fills the gap with empty records.
		 * Fail in that case, the record exists.
		 */
		if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
		    ((cbt->compare == 0 && __cursor_valid(cbt, NULL)) ||
		    (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt))))
			WT_ERR(WT_DUPLICATE_KEY);

		WT_ERR(__cursor_col_modify(session, cbt, false));
		if (F_ISSET(cursor, WT_CURSTD_APPEND))
			cbt->iface.recno = cbt->recno;
		break;
	case BTREE_ROW:
		WT_ERR(__cursor_row_search(session, cbt, NULL, true));
		/*
		 * If not overwriting, fail if the key exists, else insert the
		 * key/value pair.
		 */
		if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
		    cbt->compare == 0 && __cursor_valid(cbt, NULL))
			WT_ERR(WT_DUPLICATE_KEY);

		ret = __cursor_row_modify(session, cbt, false);
		break;
	}

err:	if (ret == WT_RESTART) {
		WT_STAT_CONN_INCR(session, cursor_restart);
		WT_STAT_DATA_INCR(session, cursor_restart);
		goto retry;
	}
	/* Insert doesn't maintain a position across calls, clear resources. */
	if (ret == 0)
		WT_TRET(__curfile_leave(cbt));
	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));
	return (ret);
}
コード例 #30
0
ファイル: bt_cursor.c プロジェクト: ksuarz/mongo
/*
 * __wt_btcur_search_near --
 *	Search for a record in the tree.
 */
int
__wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;
	int exact;
	bool valid;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	upd = NULL;					/* -Wuninitialized */
	exact = 0;

	WT_STAT_CONN_INCR(session, cursor_search_near);
	WT_STAT_DATA_INCR(session, cursor_search_near);

	/*
	 * If we have a row-store page pinned, search it; if we don't have a
	 * page pinned, or the search of the pinned page doesn't find an exact
	 * match, search from the root. Unlike WT_CURSOR.search, ignore pinned
	 * pages in the case of column-store, search-near isn't an interesting
	 * enough case for column-store to add the complexity needed to avoid
	 * the tree search.
	 *
	 * Set the "insert" flag for the btree row-store search; we may intend
	 * to position the cursor at the end of the tree, rather than match an
	 * existing record.
	 */
	valid = false;
	if (btree->type == BTREE_ROW &&
	    F_ISSET(cbt, WT_CBT_ACTIVE) &&
	    cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
		__wt_txn_cursor_op(session);

		WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));

		/*
		 * Search-near is trickier than search when searching an already
		 * pinned page. If search returns the first or last page slots,
		 * discard the results and search the full tree as the neighbor
		 * pages might offer better matches. This test is simplistic as
		 * we're ignoring append lists (there may be no page slots or we
		 * might be legitimately positioned after the last page slot).
		 * Ignore those cases, it makes things too complicated.
		 */
		if (cbt->slot != 0 &&
		    cbt->slot != cbt->ref->page->pg_row_entries - 1)
			valid = __cursor_valid(cbt, &upd);
	}
	if (!valid) {
		WT_ERR(__cursor_func_init(cbt, true));
		WT_ERR(btree->type == BTREE_ROW ?
		    __cursor_row_search(session, cbt, NULL, true) :
		    __cursor_col_search(session, cbt, NULL));
		valid = __cursor_valid(cbt, &upd);
	}

	/*
	 * If we find a valid key, return it.
	 *
	 * Else, creating a record past the end of the tree in a fixed-length
	 * column-store implicitly fills the gap with empty records.  In this
	 * case, we instantiate the empty record, it's an exact match.
	 *
	 * Else, move to the next key in the tree (bias for prefix searches).
	 * Cursor next skips invalid rows, so we don't have to test for them
	 * again.
	 *
	 * Else, redo the search and move to the previous key in the tree.
	 * Cursor previous skips invalid rows, so we don't have to test for
	 * them again.
	 *
	 * If that fails, quit, there's no record to return.
	 */
	if (valid) {
		exact = cbt->compare;
		ret = __wt_kv_return(session, cbt, upd);
	} else if (__cursor_fix_implicit(btree, cbt)) {
		cbt->recno = cursor->recno;
		cbt->v = 0;
		cursor->value.data = &cbt->v;
		cursor->value.size = 1;
		exact = 0;
	} else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND)
		exact = 1;
	else {
		WT_ERR(__cursor_func_init(cbt, true));
		WT_ERR(btree->type == BTREE_ROW ?
		    __cursor_row_search(session, cbt, NULL, true) :
		    __cursor_col_search(session, cbt, NULL));
		if (__cursor_valid(cbt, &upd)) {
			exact = cbt->compare;
			ret = __wt_kv_return(session, cbt, upd);
		} else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND)
			exact = -1;
	}

#ifdef HAVE_DIAGNOSTIC
	if (ret == 0)
		WT_ERR(__wt_cursor_key_order_init(session, cbt));
#endif

err:	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));
	if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND))
		*exactp = exact;
	return (ret);
}