示例#1
0
/*
 * __wt_async_op_enqueue --
 *	Enqueue an operation onto the work queue.
 */
int
__wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op)
{
	WT_ASYNC *async;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	uint64_t cur_head, cur_tail, my_alloc, my_slot;
#ifdef	HAVE_DIAGNOSTIC
	WT_ASYNC_OP_IMPL *my_op;
#endif

	conn = S2C(session);
	async = conn->async;

	/*
	 * If an application re-uses a WT_ASYNC_OP, we end up here with an
	 * invalid object.
	 */
	if (op->state != WT_ASYNCOP_READY)
		WT_RET_MSG(session, EINVAL,
		    "application error: WT_ASYNC_OP already in use");

	/*
	 * Enqueue op at the tail of the work queue.
	 * We get our slot in the ring buffer to use.
	 */
	my_alloc = WT_ATOMIC_ADD8(async->alloc_head, 1);
	my_slot = my_alloc % async->async_qsize;

	/*
	 * Make sure we haven't wrapped around the queue.
	 * If so, wait for the tail to advance off this slot.
	 */
	WT_ORDERED_READ(cur_tail, async->tail_slot);
	while (cur_tail == my_slot) {
		__wt_yield();
		WT_ORDERED_READ(cur_tail, async->tail_slot);
	}

#ifdef	HAVE_DIAGNOSTIC
	WT_ORDERED_READ(my_op, async->async_queue[my_slot]);
	if (my_op != NULL)
		return (__wt_panic(session));
#endif
	WT_PUBLISH(async->async_queue[my_slot], op);
	op->state = WT_ASYNCOP_ENQUEUED;
	if (WT_ATOMIC_ADD4(async->cur_queue, 1) > async->max_queue)
		WT_PUBLISH(async->max_queue, async->cur_queue);
	/*
	 * Multiple threads may be adding ops to the queue.  We need to wait
	 * our turn to make our slot visible to workers.
	 */
	WT_ORDERED_READ(cur_head, async->head);
	while (cur_head != (my_alloc - 1)) {
		__wt_yield();
		WT_ORDERED_READ(cur_head, async->head);
	}
	WT_PUBLISH(async->head, my_alloc);
	return (ret);
}
示例#2
0
文件: fops.c 项目: Asamaha/mongo
/*
 * fop --
 *	File operation function.
 */
static void *
fop(void *arg)
{
	STATS *s;
	uintptr_t id;
	WT_RAND_STATE rnd;
	u_int i;

	id = (uintptr_t)arg;
	__wt_yield();		/* Get all the threads created. */

	s = &run_stats[id];
	__wt_random_init(&rnd);

	for (i = 0; i < nops; ++i, __wt_yield())
		switch (__wt_random(&rnd) % 10) {
		case 0:
			++s->bulk;
			obj_bulk();
			break;
		case 1:
			++s->create;
			obj_create();
			break;
		case 2:
			++s->cursor;
			obj_cursor();
			break;
		case 3:
			++s->drop;
			obj_drop(__wt_random(&rnd) & 1);
			break;
		case 4:
			++s->ckpt;
			obj_checkpoint();
			break;
		case 5:
			++s->upgrade;
			obj_upgrade();
			break;
		case 6:
			++s->rebalance;
			obj_rebalance();
			break;
		case 7:
			++s->verify;
			obj_verify();
			break;
		case 8:
			++s->bulk_unique;
			obj_bulk_unique(__wt_random(&rnd) & 1);
			break;
		case 9:
			++s->create_unique;
			obj_create_unique(__wt_random(&rnd) & 1);
			break;
		}

	return (NULL);
}
示例#3
0
文件: main.c 项目: ajdavis/mongo
static void
op(WT_SESSION *session, WT_RAND_STATE *rnd, WT_CURSOR **cpp)
{
	WT_CURSOR *cursor;
	WT_DECL_RET;
	u_int i, key;
	char buf[128];
	bool readonly;

	/* Close any open cursor in the slot we're about to reuse. */
	if (*cpp != NULL) {
		testutil_check((*cpp)->close(*cpp));
		*cpp = NULL;
	}

	cursor = NULL;
	readonly = __wt_random(rnd) % 2 == 0;

	/* Loop to open an object handle. */
	for (i = __wt_random(rnd) % uris; !done; __wt_yield()) {
		/* Use a checkpoint handle for 50% of reads. */
		ret = session->open_cursor(session, uri_list[i], NULL,
		    readonly && (i % 2 == 0) ?
		    "checkpoint=WiredTigerCheckpoint" : NULL, &cursor);
		if (ret != EBUSY) {
			testutil_check(ret);
			break;
		}
		(void)__wt_atomic_add64(&worker_busy, 1);
	}
	if (cursor == NULL)
		return;

	/* Operate on some number of key/value pairs. */
	for (key = 1;
	    !done && key < MAXKEY; key += __wt_random(rnd) % 37, __wt_yield()) {
		testutil_check(
		    __wt_snprintf(buf, sizeof(buf), "key:%020u", key));
		cursor->set_key(cursor, buf);
		if (readonly)
			testutil_check(cursor->search(cursor));
		else {
			cursor->set_value(cursor, buf);
			testutil_check(cursor->insert(cursor));
		}
	}

	/* Close the cursor half the time, otherwise cache it. */
	if (__wt_random(rnd) % 2 == 0)
		testutil_check(cursor->close(cursor));
	else
		*cpp = cursor;

	(void)__wt_atomic_add64(&worker, 1);
}
示例#4
0
文件: conn_log.c 项目: alabid/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	int locked, yield;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = yield = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		__wt_spin_lock(session, &log->log_slot_lock);
		locked = 1;
		WT_ERR(__wt_log_wrlsn(session, NULL, &yield));
		locked = 0;
		__wt_spin_unlock(session, &log->log_slot_lock);
		if (++yield < 1000)
			__wt_yield();
		else
			WT_ERR(__wt_cond_wait(session,
			    conn->log_wrlsn_cond, 100000));
	}
	if (0) {
err:		__wt_err(session, ret, "log wrlsn server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_slot_lock);
	return (WT_THREAD_RET_VALUE);
}
示例#5
0
文件: txn.c 项目: bsamek/wiredtiger
/*
 * __wt_txn_global_shutdown --
 *	Shut down the global transaction state.
 */
int
__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
{
	bool txn_active;

	/*
	 * We're shutting down.  Make sure everything gets freed.
	 *
	 * It's possible that the eviction server is in the middle of a long
	 * operation, with a transaction ID pinned.  In that case, we will loop
	 * here until the transaction ID is released, when the oldest
	 * transaction ID will catch up with the current ID.
	 */
	for (;;) {
		WT_RET(__wt_txn_activity_check(session, &txn_active));
		if (!txn_active)
			break;

		WT_STAT_CONN_INCR(session, txn_release_blocked);
		__wt_yield();
	}

#ifdef HAVE_TIMESTAMPS
	/*
	 * Now that all transactions have completed, no timestamps should be
	 * pinned.
	 */
	__wt_timestamp_set_inf(&S2C(session)->txn_global.pinned_timestamp);
#endif

	return (0);
}
示例#6
0
/*
 * __lsm_tree_close --
 *	Close an LSM tree structure.
 */
static int
__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	int i;

	/* Stop any active merges. */
	F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE);

	/*
	 * Wait for all LSM operations and work units that were in flight to
	 * finish.
	 */
	for (i = 0; lsm_tree->refcnt > 1 || lsm_tree->queue_ref > 0; ++i) {
		/*
		 * Remove any work units from the manager queues. Do this step
		 * repeatedly in case a work unit was in the process of being
		 * created when we cleared the active flag.
		 * !! Drop the schema lock whilst completing this step so that
		 * we don't block any operations that require the schema
		 * lock to complete. This is safe because any operation that
		 * is closing the tree should first have gotten exclusive
		 * access to the LSM tree via __wt_lsm_tree_get, so other
		 * schema level operations will return EBUSY, even though
		 * we're dropping the schema lock here.
		 */
		if (i % 1000 == 0) {
			WT_WITHOUT_SCHEMA_LOCK(session, ret =
			    __wt_lsm_manager_clear_tree(session, lsm_tree));
			WT_RET(ret);
		}
		__wt_yield();
	}
	return (0);
}
示例#7
0
/*
 * __spin_lock_next_id --
 *	Return the next spinlock caller ID.
 */
static int
__spin_lock_next_id(WT_SESSION_IMPL *session, int *idp)
{
	static int lock_id = 0, next_id = 0;
	WT_DECL_RET;

	/* If we've ever registered this location, we already have an ID. */
	if (*idp != WT_SPINLOCK_REGISTER)
		return (0);

	/*
	 * We can't use the global spinlock to lock the ID allocation (duh!),
	 * use a CAS instruction to serialize access to a local variable.
	 * This work only gets done once per library instantiation, there
	 * isn't a performance concern.
	 */
	while (!WT_ATOMIC_CAS(lock_id, 0, 1))
		__wt_yield();

	/* Allocate a blocking ID for this location. */
	if (*idp == WT_SPINLOCK_REGISTER) {
		if (next_id < WT_SPINLOCK_MAX_LOCATION_ID)
			*idp = next_id++;
		else
			WT_ERR_MSG(session, ENOMEM,
			    "spinlock caller location registry failed, "
			    "increase the connection's blocking matrix size");
	}

err:	WT_PUBLISH(lock_id, 0);
	return (ret);
}
示例#8
0
文件: file.c 项目: DINKIN/mongo
void
obj_bulk(void)
{
	WT_CURSOR *c;
	WT_SESSION *session;
	int ret;

	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "conn.session");

	if ((ret = session->create(session, uri, config)) != 0)
		if (ret != EEXIST && ret != EBUSY)
			testutil_die(ret, "session.create");

	if (ret == 0) {
		__wt_yield();
		if ((ret = session->open_cursor(
		    session, uri, NULL, "bulk", &c)) == 0) {
			if ((ret = c->close(c)) != 0)
				testutil_die(ret, "cursor.close");
		} else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
			testutil_die(ret, "session.open_cursor bulk");
	}
	if ((ret = session->close(session, NULL)) != 0)
		testutil_die(ret, "session.close");
}
示例#9
0
文件: file.c 项目: DINKIN/mongo
void
obj_create_unique(int force)
{
	WT_SESSION *session;
	int ret;
	char new_uri[64];

	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "conn.session");

	/* Generate a unique object name. */
	if ((ret = pthread_rwlock_wrlock(&single)) != 0)
		testutil_die(ret, "pthread_rwlock_wrlock single");
	testutil_check(__wt_snprintf(
	    new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
	if ((ret = pthread_rwlock_unlock(&single)) != 0)
		testutil_die(ret, "pthread_rwlock_unlock single");

	if ((ret = session->create(session, new_uri, config)) != 0)
		testutil_die(ret, "session.create");

	__wt_yield();
	while ((ret = session->drop(
	    session, new_uri, force ? "force" : NULL)) != 0)
		if (ret != EBUSY)
			testutil_die(ret, "session.drop: %s", new_uri);

	if ((ret = session->close(session, NULL)) != 0)
		testutil_die(ret, "session.close");
}
示例#10
0
文件: log_slot.c 项目: alabid/mongo
/*
 * __log_slot_find_free --
 * 	Find and return a free log slot.
 */
static int
__log_slot_find_free(WT_SESSION_IMPL *session, WT_LOGSLOT **slot)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	uint32_t pool_i;

	conn = S2C(session);
	log = conn->log;
	WT_ASSERT(session, slot != NULL);
	/*
	 * Encourage processing and moving the write LSN forward.
	 * That process has to walk the slots anyway, so do that
	 * work and let it give us the index of a free slot along
	 * the way.
	 */
	WT_RET(__wt_log_wrlsn(session, &pool_i, NULL));
	while (pool_i == WT_SLOT_POOL) {
		__wt_yield();
		WT_RET(__wt_log_wrlsn(session, &pool_i, NULL));
	}
	*slot = &log->slot_pool[pool_i];
	WT_ASSERT(session, (*slot)->slot_state == WT_LOG_SLOT_FREE);
	return (0);
}
示例#11
0
/*
 * __wt_log_slot_switch --
 *	Switch out the current slot and set up a new one.
 */
int
__wt_log_slot_switch(WT_SESSION_IMPL *session,
    WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work)
{
	WT_DECL_RET;
	WT_LOG *log;

	log = S2C(session)->log;

	/*
	 * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the
	 * compiler does not like it combined directly with the while loop
	 * here.
	 *
	 * The loop conditional is a bit complex.  We have to retry if we
	 * closed the slot but were unable to set up a new slot.  In that
	 * case the flag indicating we have closed the slot will still be set.
	 * We have to retry in that case regardless of the retry setting
	 * because we are responsible for setting up the new slot.
	 */
	do {
		WT_WITH_SLOT_LOCK(session, log,
		    ret = __log_slot_switch_internal(
		    session, myslot, forced, did_work));
		if (ret == EBUSY) {
			WT_STAT_CONN_INCR(session, log_slot_switch_busy);
			__wt_yield();
		}
		WT_RET(WT_SESSION_CHECK_PANIC(session));
		if (F_ISSET(S2C(session), WT_CONN_CLOSING))
			break;
	} while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY));
	return (ret);
}
示例#12
0
文件: thread.c 项目: ajdavis/mongo
/*
 * Create a table and open a bulk cursor on it.
 */
void
op_bulk(void *arg)
{
	TEST_OPTS *opts;
	TEST_PER_THREAD_OPTS *args;
	WT_CURSOR *c;
	WT_SESSION *session;
	int ret;

	args = (TEST_PER_THREAD_OPTS *)arg;
	opts = args->testopts;

	testutil_check(
	    opts->conn->open_session(opts->conn, NULL, NULL, &session));

	if ((ret = session->create(session,
	    opts->uri, DEFAULT_TABLE_SCHEMA)) != 0)
		if (ret != EEXIST && ret != EBUSY)
			testutil_die(ret, "session.create");

	if (ret == 0) {
		__wt_yield();
		if ((ret = session->open_cursor(session,
		    opts->uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
			 testutil_check(c->close(c));
		} else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
			testutil_die(ret, "session.open_cursor bulk");
	}

	testutil_check(session->close(session, NULL));
	args->thread_counter++;
}
示例#13
0
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LSN prev;
	WT_SESSION_IMPL *session;
	int yield;
	bool did_work;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	WT_INIT_LSN(&prev);
	did_work = false;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Write out any log record buffers if anything was done
		 * since last time.  Only call the function to walk the
		 * slots if the system is not idle.  On an idle system
		 * the alloc_lsn will not advance and the written lsn will
		 * match the alloc_lsn.
		 */
		if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
		    __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
			WT_ERR(__wt_log_wrlsn(session, &yield));
		else
			WT_STAT_FAST_CONN_INCR(session, log_write_lsn_skip);
		prev = log->alloc_lsn;
		if (yield == 0)
			did_work = true;
		else
			did_work = false;
		/*
		 * If __wt_log_wrlsn did work we want to yield instead of sleep.
		 */
		if (yield++ < WT_THOUSAND)
			__wt_yield();
		else
			/*
			 * Send in false because if we did any work we would
			 * not be on this path.
			 */
			WT_ERR(__wt_cond_auto_wait(
			    session, conn->log_wrlsn_cond, did_work));
	}
	/*
	 * On close we need to do this one more time because there could
	 * be straggling log writes that need to be written.
	 */
	WT_ERR(__wt_log_force_write(session, 1, NULL));
	WT_ERR(__wt_log_wrlsn(session, NULL));
	if (0) {
err:		__wt_err(session, ret, "log wrlsn server error");
	}
	return (WT_THREAD_RET_VALUE);
}
示例#14
0
/*
 * __wt_delete_page_rollback --
 *	Abort pages that were deleted without being instantiated.
 */
void
__wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
{
	WT_UPDATE **upd;

	/*
	 * If the page is still "deleted", it's as we left it, reset the state
	 * to on-disk and we're done.  Otherwise, we expect the page is either
	 * instantiated or being instantiated.  Loop because it's possible for
	 * the page to return to the deleted state if instantiation fails.
	 */
	for (;; __wt_yield())
		switch (ref->state) {
		case WT_REF_DISK:
		case WT_REF_READING:
			WT_ASSERT(session, 0);		/* Impossible, assert */
			break;
		case WT_REF_DELETED:
			/*
			 * If the page is still "deleted", it's as we left it,
			 * reset the state.
			 */
			if (__wt_atomic_casv32(
			    &ref->state, WT_REF_DELETED, WT_REF_DISK))
				return;
			break;
		case WT_REF_LOCKED:
			/*
			 * A possible state, the page is being instantiated.
			 */
			break;
		case WT_REF_MEM:
		case WT_REF_SPLIT:
			/*
			 * We can't use the normal read path to get a copy of
			 * the page because the session may have closed the
			 * cursor, we no longer have the reference to the tree
			 * required for a hazard pointer.  We're safe because
			 * with unresolved transactions, the page isn't going
			 * anywhere.
			 *
			 * The page is in an in-memory state, walk the list of
			 * update structures and abort them.
			 */
			for (upd =
			    ref->page_del->update_list; *upd != NULL; ++upd)
				(*upd)->txnid = WT_TXN_ABORTED;

			/*
			 * Discard the memory, the transaction can't abort
			 * twice.
			 */
			__wt_free(session, ref->page_del->update_list);
			__wt_free(session, ref->page_del);
			return;
		}
}
示例#15
0
文件: log_slot.c 项目: Jaryli/mongo
/*
 * __wt_log_slot_new --
 *	Find a free slot and switch it as the new active slot.
 *	Must be called holding the slot lock.
 */
int
__wt_log_slot_new(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	WT_LOGSLOT *slot;
	int32_t i;

	WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
	conn = S2C(session);
	log = conn->log;
	/*
	 * Although this function is single threaded, multiple threads could
	 * be trying to set a new active slot sequentially.  If we find an
	 * active slot that is valid, return.
	 */
	if ((slot = log->active_slot) != NULL &&
	    WT_LOG_SLOT_OPEN(slot->slot_state))
		return (0);

	/*
	 * Keep trying until we can find a free slot.
	 */
	for (;;) {
		/*
		 * For now just restart at 0.  We could use log->pool_index
		 * if that is inefficient.
		 */
		for (i = 0; i < WT_SLOT_POOL; i++) {
			slot = &log->slot_pool[i];
			if (slot->slot_state == WT_LOG_SLOT_FREE) {
				/*
				 * Acquire our starting position in the
				 * log file.  Assume the full buffer size.
				 */
				WT_RET(__wt_log_acquire(session,
				    log->slot_buf_size, slot));
				/*
				 * We have a new, initialized slot to use.
				 * Set it as the active slot.
				 */
				WT_STAT_FAST_CONN_INCR(session,
				    log_slot_transitions);
				log->active_slot = slot;
				return (0);
			}
		}
		/*
		 * If we didn't find any free slots signal the worker thread.
		 */
		(void)__wt_cond_signal(session, conn->log_wrlsn_cond);
		__wt_yield();
	}
	/* NOTREACHED */
}
示例#16
0
/*
 * __page_refp --
 *      Return the page's index and slot for a reference.
 */
static inline void
__page_refp(WT_SESSION_IMPL *session,
    WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
{
	WT_PAGE_INDEX *pindex;
	uint32_t i;

	/*
	 * Copy the parent page's index value: the page can split at any time,
	 * but the index's value is always valid, even if it's not up-to-date.
	 */
retry:	WT_INTL_INDEX_GET(session, ref->home, pindex);

	/*
	 * Use the page's reference hint: it should be correct unless the page
	 * split before our slot.  If the page splits after our slot, the hint
	 * will point earlier in the array than our actual slot, so the first
	 * loop is from the hint to the end of the list, and the second loop
	 * is from the start of the list to the end of the list.  (The second
	 * loop overlaps the first, but that only happen in cases where we've
	 * deepened the tree and aren't going to find our slot at all, that's
	 * not worth optimizing.)
	 *
	 * It's not an error for the reference hint to be wrong, it just means
	 * the first retrieval (which sets the hint for subsequent retrievals),
	 * is slower.
	 */
	i = ref->pindex_hint;
	if (i < pindex->entries && pindex->index[i]->page == ref->page) {
		*pindexp = pindex;
		*slotp = i;
		return;
	}
	while (++i < pindex->entries)
		if (pindex->index[i]->page == ref->page) {
			*pindexp = pindex;
			*slotp = ref->pindex_hint = i;
			return;
		}
	for (i = 0; i < pindex->entries; ++i)
		if (pindex->index[i]->page == ref->page) {
			*pindexp = pindex;
			*slotp = ref->pindex_hint = i;
			return;
		}

	/*
	 * If we don't find our reference, the page split into a new level and
	 * our home pointer references the wrong page.  After internal pages
	 * deepen, their reference structure home value are updated; yield and
	 * wait for that to happen.
	 */
	__wt_yield();
	goto retry;
}
示例#17
0
文件: thread.c 项目: ajdavis/mongo
/*
 * Create a guaranteed unique table and open and close a bulk cursor on it.
 */
void
op_bulk_unique(void *arg)
{
	TEST_OPTS *opts;
	TEST_PER_THREAD_OPTS *args;
	WT_CURSOR *c;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	int ret;
	char new_uri[64];

	args = (TEST_PER_THREAD_OPTS *)arg;
	opts = args->testopts;
	__wt_random_init_seed(NULL, &rnd);

	testutil_check(
	    opts->conn->open_session(opts->conn, NULL, NULL, &session));

	/* Generate a unique object name. */
	testutil_check(__wt_snprintf(
	    new_uri, sizeof(new_uri), "%s.%" PRIu64,
	    opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
	testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));

	__wt_yield();

	/*
	 * Opening a bulk cursor may have raced with a forced checkpoint
	 * which created a checkpoint of the empty file, and triggers an EINVAL.
	 */
	if ((ret = session->open_cursor(
	    session, new_uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
		testutil_check(c->close(c));
	} else if (ret != EINVAL && ret != EBUSY)
		testutil_die(ret,
		    "session.open_cursor bulk unique: %s", new_uri);

	while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
	    "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
		if (ret != EBUSY)
			testutil_die(ret, "session.drop: %s", new_uri);
		else
			/*
			 * The EBUSY is expected when we run with
			 * checkpoint_wait set to false, so we increment the
			 * counter while in this loop to avoid false positives.
			 */
			args->thread_counter++;

	testutil_check(session->close(session, NULL));
	args->thread_counter++;
}
示例#18
0
/*
 * __wt_log_slot_wait --
 *	Wait for slot leader to allocate log area and tell us our log offset.
 */
int
__wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
	int yield_count;

	yield_count = 0;
	WT_UNUSED(session);

	while (slot->slot_state > WT_LOG_SLOT_DONE)
		if (++yield_count < 1000)
			__wt_yield();
		else
			__wt_sleep(0, 200);
	return (0);
}
示例#19
0
文件: file.c 项目: DINKIN/mongo
void
obj_bulk_unique(int force)
{
	WT_CURSOR *c;
	WT_SESSION *session;
	int ret;
	char new_uri[64];

	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "conn.session");

	/* Generate a unique object name. */
	if ((ret = pthread_rwlock_wrlock(&single)) != 0)
		testutil_die(ret, "pthread_rwlock_wrlock single");
	testutil_check(__wt_snprintf(
	    new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
	if ((ret = pthread_rwlock_unlock(&single)) != 0)
		testutil_die(ret, "pthread_rwlock_unlock single");

	if ((ret = session->create(session, new_uri, config)) != 0)
		testutil_die(ret, "session.create: %s", new_uri);

	__wt_yield();
	/*
	 * Opening a bulk cursor may have raced with a forced checkpoint
	 * which created a checkpoint of the empty file, and triggers an EINVAL
	 */
	if ((ret = session->open_cursor(
	    session, new_uri, NULL, "bulk", &c)) == 0) {
		if ((ret = c->close(c)) != 0)
			testutil_die(ret, "cursor.close");
	} else if (ret != EINVAL)
		testutil_die(ret,
		    "session.open_cursor bulk unique: %s, new_uri");

	while ((ret = session->drop(
	    session, new_uri, force ? "force" : NULL)) != 0)
		if (ret != EBUSY)
			testutil_die(ret, "session.drop: %s", new_uri);

	if ((ret = session->close(session, NULL)) != 0)
		testutil_die(ret, "session.close");
}
示例#20
0
文件: bt_sync.c 项目: ajdavis/mongo
/*
 * __sync_dup_walk --
 *	Duplicate a tree walk point.
 */
static inline int
__sync_dup_walk(
    WT_SESSION_IMPL *session, WT_REF *walk, uint32_t flags, WT_REF **dupp)
{
	WT_REF *old;
	bool busy;

	if ((old = *dupp) != NULL) {
		*dupp = NULL;
		WT_RET(__wt_page_release(session, old, flags));
	}

	/* It is okay to duplicate a walk before it starts. */
	if (walk == NULL || __wt_ref_is_root(walk)) {
		*dupp = walk;
		return (0);
	}

	/* Get a duplicate hazard pointer. */
	for (;;) {
#ifdef HAVE_DIAGNOSTIC
		WT_RET(
		    __wt_hazard_set(session, walk, &busy, __func__, __LINE__));
#else
		WT_RET(__wt_hazard_set(session, walk, &busy));
#endif
		/*
		 * We already have a hazard pointer, we should generally be able
		 * to get another one. We can get spurious busy errors (e.g., if
		 * eviction is attempting to lock the page. Keep trying: we have
		 * one hazard pointer so we should be able to get another one.
		 */
		if (!busy)
			break;
		__wt_yield();
	}

	*dupp = walk;
	return (0);
}
示例#21
0
文件: thread.c 项目: ajdavis/mongo
/*
 * Create and drop a unique guaranteed table.
 */
void
op_create_unique(void *arg)
{
	TEST_OPTS *opts;
	TEST_PER_THREAD_OPTS *args;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	int ret;
	char new_uri[64];

	args = (TEST_PER_THREAD_OPTS *)arg;
	opts = args->testopts;
	__wt_random_init_seed(NULL, &rnd);

	testutil_check(
	    opts->conn->open_session(opts->conn, NULL, NULL, &session));

	/* Generate a unique object name. */
	testutil_check(__wt_snprintf(
	    new_uri, sizeof(new_uri), "%s.%" PRIu64,
	    opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
	testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));

	__wt_yield();
	while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
	    "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
		if (ret != EBUSY)
			testutil_die(ret, "session.drop: %s", new_uri);
		else
			/*
			 * The EBUSY is expected when we run with
			 * checkpoint_wait set to false, so we increment the
			 * counter while in this loop to avoid false positives.
			 */
			args->thread_counter++;

	testutil_check(session->close(session, NULL));
	args->thread_counter++;
}
示例#22
0
文件: txn.c 项目: sylvanchil/mongo
/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 * !!!
 * If a data-source is calling the WT_EXTENSION_API.transaction_oldest
 * method (for the oldest transaction ID not yet visible to a running
 * transaction), and then comparing that oldest ID against committed
 * transactions to see if updates for a committed transaction are still
 * visible to running transactions, the oldest transaction ID may be
 * the same as the last committed transaction ID, if the transaction
 * state wasn't refreshed after the last transaction committed.  Push
 * past the last committed transaction.
*/
void
__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;
	int32_t count;
	bool last_running_moved;

	conn = S2C(session);
	txn_global = &conn->txn_global;

retry:
	current_id = last_running = txn_global->current;
	oldest_session = NULL;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return;

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1));

	/* The oldest ID cannot change until the scan count goes to zero. */
	prev_oldest_id = txn_global->oldest_id;
	current_id = oldest_id = last_running = txn_global->current;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Update the oldest ID.
		 *
		 * Ignore: IDs older than the oldest ID we saw. This can happen
		 * if we race with a thread that is allocating an ID -- the ID
		 * will not be used because the thread will keep spinning until
		 * it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without incrementing scan_count to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	/* Update the last running ID. */
	last_running_moved =
	    WT_TXNID_LT(txn_global->last_running, last_running);

	/* Update the oldest ID. */
	if (WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) {
		/*
		 * We know we want to update.  Check if we're racing.
		 */
		if (__wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) {
			WT_ORDERED_READ(session_cnt, conn->session_cnt);
			for (i = 0, s = txn_global->states;
			    i < session_cnt; i++, s++) {
				if ((id = s->id) != WT_TXN_NONE &&
				WT_TXNID_LT(id, last_running))
					last_running = id;
				if ((id = s->snap_min) != WT_TXN_NONE &&
				WT_TXNID_LT(id, oldest_id))
					oldest_id = id;
			}

			if (WT_TXNID_LT(last_running, oldest_id))
				oldest_id = last_running;

#ifdef HAVE_DIAGNOSTIC
			/*
			 * Make sure the ID doesn't move past any named
			 * snapshots.
			 *
			 * Don't include the read/assignment in the assert
			 * statement.  Coverity complains if there are
			 * assignments only done in diagnostic builds, and
			 * when the read is from a volatile.
			 */
			id = txn_global->nsnap_oldest_id;
			WT_ASSERT(session,
			    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
#endif
			if (WT_TXNID_LT(txn_global->last_running, last_running))
				txn_global->last_running = last_running;
			if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
				txn_global->oldest_id = oldest_id;
			WT_ASSERT(session, txn_global->scan_count == -1);
			txn_global->scan_count = 0;
		} else {
			/*
			 * We wanted to update the oldest ID but we're racing
			 * another thread.  Retry if this is a forced update.
			 */
			WT_ASSERT(session, txn_global->scan_count > 0);
			(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
			if (force) {
				__wt_yield();
				goto retry;
			}
		}
	} else {
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && oldest_session != NULL) {
			(void)__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %d [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
		WT_ASSERT(session, txn_global->scan_count > 0);
		(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
	}
}
示例#23
0
/*
 * __ref_descend_prev --
 *	Descend the tree one level, during a previous-cursor walk.
 */
static inline void
__ref_descend_prev(
    WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
	WT_PAGE_INDEX *pindex;
	uint64_t yield_count;

	/*
	 * We're passed a child page into which we're descending, and on which
	 * we have a hazard pointer.
	 */
	for (yield_count = 0;; yield_count++, __wt_yield()) {
		/*
		 * There's a split race when a cursor moving backwards through
		 * the tree descends the tree. If we're splitting an internal
		 * page into its parent, we move the WT_REF structures and
		 * update the parent's page index before updating the split
		 * page's page index, and it's not an atomic update. A thread
		 * can read the parent page's replacement page index and then
		 * read the split page's original index.
		 *
		 * This can create a race for previous-cursor movements.
		 *
		 * For example, imagine an internal page with 3 child pages,
		 * with the namespaces a-f, g-h and i-j; the first child page
		 * splits. The parent starts out with the following page-index:
		 *
		 *	| ... | a | g | i | ... |
		 *
		 * The split page starts out with the following page-index:
		 *
		 *	| a | b | c | d | e | f |
		 *
		 * The first step is to move the c-f ranges into a new subtree,
		 * so, for example we might have two new internal pages 'c' and
		 * 'e', where the new 'c' page references the c-d namespace and
		 * the new 'e' page references the e-f namespace. The top of the
		 * subtree references the parent page, but until the parent's
		 * page index is updated, any threads in the subtree won't be
		 * able to ascend out of the subtree. However, once the parent
		 * page's page index is updated to this:
		 *
		 *	| ... | a | c | e | g | i | ... |
		 *
		 * threads in the subtree can ascend into the parent. Imagine a
		 * cursor in the c-d part of the namespace that ascends to the
		 * parent's 'c' slot. It would then decrement to the slot before
		 * the 'c' slot, the 'a' slot.
		 *
		 * The previous-cursor movement selects the last slot in the 'a'
		 * page; if the split page's page-index hasn't been updated yet,
		 * it will select the 'f' slot, which is incorrect. Once the
		 * split page's page index is updated to this:
		 *
		 *	| a | b |
		 *
		 * the previous-cursor movement will select the 'b' slot, which
		 * is correct.
		 *
		 * This function takes an argument which is the internal page
		 * from which we're descending. If the last slot on the page no
		 * longer points to the current page as its "home", the page is
		 * being split and part of its namespace moved. We have the
		 * correct page and we don't have to move, all we have to do is
		 * wait until the split page's page index is updated.
		 */
		WT_INTL_INDEX_GET(session, ref->page, pindex);
		if (pindex->index[pindex->entries - 1]->home == ref->page)
			break;
	}
	*pindexp = pindex;
	WT_STAT_CONN_INCRV(session, tree_descend_blocked, yield_count);
}
示例#24
0
文件: bt_read.c 项目: qihsh/mongo
/*
 * __wt_page_in_func --
 *	Acquire a hazard pointer to a page; if the page is not in-memory,
 *	read it from the disk and build an in-memory version.
 */
int
__wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
    , const char *file, int line
#endif
    )
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_PAGE *page;
	u_int sleep_cnt, wait_cnt;
	int busy, cache_work, force_attempts, oldgen, stalled;

	btree = S2BT(session);
	stalled = 0;

	for (force_attempts = oldgen = 0, sleep_cnt = wait_cnt = 0;;) {
		switch (ref->state) {
		case WT_REF_DISK:
		case WT_REF_DELETED:
			if (LF_ISSET(WT_READ_CACHE))
				return (WT_NOTFOUND);

			/*
			 * The page isn't in memory, read it. If this thread is
			 * allowed to do eviction work, check for space in the
			 * cache.
			 */
			if (!LF_ISSET(WT_READ_NO_EVICT))
				WT_RET(__wt_cache_eviction_check(
				    session, 1, NULL));
			WT_RET(__page_read(session, ref));
			oldgen = LF_ISSET(WT_READ_WONT_NEED) ||
			    F_ISSET(session, WT_SESSION_NO_CACHE);
			continue;
		case WT_REF_READING:
			if (LF_ISSET(WT_READ_CACHE))
				return (WT_NOTFOUND);
			if (LF_ISSET(WT_READ_NO_WAIT))
				return (WT_NOTFOUND);

			/* Waiting on another thread's read, stall. */
			WT_STAT_FAST_CONN_INCR(session, page_read_blocked);
			stalled = 1;
			break;
		case WT_REF_LOCKED:
			if (LF_ISSET(WT_READ_NO_WAIT))
				return (WT_NOTFOUND);

			/* Waiting on eviction, stall. */
			WT_STAT_FAST_CONN_INCR(session, page_locked_blocked);
			stalled = 1;
			break;
		case WT_REF_SPLIT:
			return (WT_RESTART);
		case WT_REF_MEM:
			/*
			 * The page is in memory.
			 *
			 * Get a hazard pointer if one is required. We cannot
			 * be evicting if no hazard pointer is required, we're
			 * done.
			 */
			if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
				goto skip_evict;

			/*
			 * The expected reason we can't get a hazard pointer is
			 * because the page is being evicted, yield, try again.
			 */
#ifdef HAVE_DIAGNOSTIC
			WT_RET(
			    __wt_hazard_set(session, ref, &busy, file, line));
#else
			WT_RET(__wt_hazard_set(session, ref, &busy));
#endif
			if (busy) {
				WT_STAT_FAST_CONN_INCR(
				    session, page_busy_blocked);
				break;
			}

			/*
			 * If eviction is configured for this file, check to see
			 * if the page qualifies for forced eviction and update
			 * the page's generation number. If eviction isn't being
			 * done on this file, we're done.
			 */
			if (LF_ISSET(WT_READ_NO_EVICT) ||
			    F_ISSET(session, WT_SESSION_NO_EVICTION) ||
			    F_ISSET(btree, WT_BTREE_NO_EVICTION))
				goto skip_evict;

			/*
			 * Forcibly evict pages that are too big.
			 */
			page = ref->page;
			if (force_attempts < 10 &&
			    __evict_force_check(session, page)) {
				++force_attempts;
				ret = __wt_page_release_evict(session, ref);
				/* If forced eviction fails, stall. */
				if (ret == EBUSY) {
					ret = 0;
					WT_STAT_FAST_CONN_INCR(session,
					    page_forcible_evict_blocked);
					stalled = 1;
					break;
				}
				WT_RET(ret);

				/*
				 * The result of a successful forced eviction
				 * is a page-state transition (potentially to
				 * an in-memory page we can use, or a restart
				 * return for our caller), continue the outer
				 * page-acquisition loop.
				 */
				continue;
			}

			/*
			 * If we read the page and we are configured to not
			 * trash the cache, set the oldest read generation so
			 * the page is forcibly evicted as soon as possible.
			 *
			 * Otherwise, update the page's read generation.
			 */
			if (oldgen && page->read_gen == WT_READGEN_NOTSET)
				__wt_page_evict_soon(page);
			else if (!LF_ISSET(WT_READ_NO_GEN) &&
			    page->read_gen != WT_READGEN_OLDEST &&
			    page->read_gen < __wt_cache_read_gen(session))
				page->read_gen =
				    __wt_cache_read_gen_bump(session);
skip_evict:
			/*
			 * Check if we need an autocommit transaction.
			 * Starting a transaction can trigger eviction, so skip
			 * it if eviction isn't permitted.
			 */
			return (LF_ISSET(WT_READ_NO_EVICT) ? 0 :
			    __wt_txn_autocommit_check(session));
		WT_ILLEGAL_VALUE(session);
		}

		/*
		 * We failed to get the page -- yield before retrying, and if
		 * we've yielded enough times, start sleeping so we don't burn
		 * CPU to no purpose.
		 */
		if (stalled)
			wait_cnt += 1000;
		else if (++wait_cnt < 1000) {
			__wt_yield();
			continue;
		}

		/*
		 * If stalling and this thread is allowed to do eviction work,
		 * check if the cache needs help. If we do work for the cache,
		 * substitute that for a sleep.
		 */
		if (!LF_ISSET(WT_READ_NO_EVICT)) {
			WT_RET(
			    __wt_cache_eviction_check(session, 1, &cache_work));
			if (cache_work)
				continue;
		}
		sleep_cnt = WT_MIN(sleep_cnt + 1000, 10000);
		WT_STAT_FAST_CONN_INCRV(session, page_sleep, sleep_cnt);
		__wt_sleep(0, sleep_cnt);
	}
}
示例#25
0
文件: conn_log.c 项目: christkv/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *slot;
	WT_SESSION_IMPL *session;
	size_t written_i;
	uint32_t i, save_i;
	int yield;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * No need to use the log_slot_lock because the slot pool
		 * is statically allocated and any slot in the
		 * WT_LOG_SLOT_WRITTEN state is exclusively ours for now.
		 */
		i = 0;
		written_i = 0;
		/*
		 * Walk the array once saving any slots that are in the
		 * WT_LOG_SLOT_WRITTEN state.
		 */
		while (i < WT_SLOT_POOL) {
			save_i = i;
			slot = &log->slot_pool[i++];
			if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
				continue;
			written[written_i].slot_index = save_i;
			written[written_i++].lsn = slot->slot_release_lsn;
		}
		/*
		 * If we found any written slots process them.  We sort them
		 * based on the release LSN, and then look for them in order.
		 */
		if (written_i > 0) {
			yield = 0;
			WT_INSERTION_SORT(written, written_i,
			    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);

			/*
			 * We know the written array is sorted by LSN.  Go
			 * through them either advancing write_lsn or stop
			 * as soon as one is not in order.
			 */
			for (i = 0; i < written_i; i++) {
				if (WT_LOG_CMP(&log->write_lsn,
				    &written[i].lsn) != 0)
					break;
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				slot = &log->slot_pool[written[i].slot_index];
				WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_ERR(__wt_cond_signal(session,
				    log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);

				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_ERR(__wt_cond_signal(session,
					    conn->log_file_cond));
				WT_ERR(__wt_log_slot_free(session, slot));
			}
		}
		/*
		 * If we saw a later write, we always want to yield because
		 * we know something is in progress.
		 */
		if (yield++ < 1000)
			__wt_yield();
		else
			/* Wait until the next event. */
			WT_ERR(__wt_cond_wait(session,
			    conn->log_wrlsn_cond, 100000));
	}

	if (0)
err:		__wt_err(session, ret, "log wrlsn server error");
	return (WT_THREAD_RET_VALUE);
}
示例#26
0
/*
 * __wt_connection_close --
 *	Close a connection handle.
 */
int
__wt_connection_close(WT_CONNECTION_IMPL *conn)
{
	WT_CONNECTION *wt_conn;
	WT_DECL_RET;
	WT_DLH *dlh;
	WT_SESSION_IMPL *s, *session;
	WT_TXN_GLOBAL *txn_global;
	u_int i;

	wt_conn = &conn->iface;
	txn_global = &conn->txn_global;
	session = conn->default_session;

	/*
	 * We're shutting down.  Make sure everything gets freed.
	 *
	 * It's possible that the eviction server is in the middle of a long
	 * operation, with a transaction ID pinned.  In that case, we will loop
	 * here until the transaction ID is released, when the oldest
	 * transaction ID will catch up with the current ID.
	 */
	for (;;) {
		WT_TRET(__wt_txn_update_oldest(session,
		    WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
		if (txn_global->oldest_id == txn_global->current)
			break;
		__wt_yield();
	}

	/* Clear any pending async ops. */
	WT_TRET(__wt_async_flush(session));

	/*
	 * Shut down server threads other than the eviction server, which is
	 * needed later to close btree handles.  Some of these threads access
	 * btree handles, so take care in ordering shutdown to make sure they
	 * exit before files are closed.
	 */
	F_CLR(conn, WT_CONN_SERVER_RUN);
	WT_TRET(__wt_async_destroy(session));
	WT_TRET(__wt_lsm_manager_destroy(session));
	WT_TRET(__wt_sweep_destroy(session));

	F_SET(conn, WT_CONN_CLOSING);

	WT_TRET(__wt_checkpoint_server_destroy(session));
	WT_TRET(__wt_statlog_destroy(session, true));
	WT_TRET(__wt_evict_destroy(session));

	/* Shut down the lookaside table, after all eviction is complete. */
	WT_TRET(__wt_las_destroy(session));

	/* Close open data handles. */
	WT_TRET(__wt_conn_dhandle_discard(session));

	/* Shut down metadata tracking, required before creating tables. */
	WT_TRET(__wt_meta_track_destroy(session));

	/*
	 * Now that all data handles are closed, tell logging that a checkpoint
	 * has completed then shut down the log manager (only after closing
	 * data handles).  The call to destroy the log manager is outside the
	 * conditional because we allocate the log path so that printlog can
	 * run without running logging or recovery.
	 */
	if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE))
		WT_TRET(__wt_txn_checkpoint_log(
		    session, true, WT_TXN_LOG_CKPT_STOP, NULL));
	F_CLR(conn, WT_CONN_LOG_SERVER_RUN);
	WT_TRET(__wt_logmgr_destroy(session));

	/* Free memory for collators, compressors, data sources. */
	WT_TRET(__wt_conn_remove_collator(session));
	WT_TRET(__wt_conn_remove_compressor(session));
	WT_TRET(__wt_conn_remove_data_source(session));
	WT_TRET(__wt_conn_remove_encryptor(session));
	WT_TRET(__wt_conn_remove_extractor(session));

	/* Disconnect from shared cache - must be before cache destroy. */
	WT_TRET(__wt_conn_cache_pool_destroy(session));

	/* Discard the cache. */
	WT_TRET(__wt_cache_destroy(session));

	/* Discard transaction state. */
	WT_TRET(__wt_txn_global_destroy(session));

	/* Close extensions, first calling any unload entry point. */
	while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) {
		TAILQ_REMOVE(&conn->dlhqh, dlh, q);

		if (dlh->terminate != NULL)
			WT_TRET(dlh->terminate(wt_conn));
		WT_TRET(__wt_dlclose(session, dlh));
	}

	/* Close the lock file, opening up the database to other connections. */
	if (conn->lock_fh != NULL)
		WT_TRET(__wt_close(session, &conn->lock_fh));

	/* Close any file handles left open. */
	WT_TRET(__wt_close_connection_close(session));

	/*
	 * Close the internal (default) session, and switch back to the dummy
	 * session in case of any error messages from the remaining operations
	 * while destroying the connection handle.
	 */
	if (session != &conn->dummy_session) {
		WT_TRET(session->iface.close(&session->iface, NULL));
		session = conn->default_session = &conn->dummy_session;
	}

	/*
	 * The session's split stash isn't discarded during normal session close
	 * because it may persist past the life of the session.  Discard it now.
	 */
	if ((s = conn->sessions) != NULL)
		for (i = 0; i < conn->session_size; ++s, ++i)
			__wt_split_stash_discard_all(session, s);

	/*
	 * The session's hazard pointer memory isn't discarded during normal
	 * session close because access to it isn't serialized.  Discard it
	 * now.
	 */
	if ((s = conn->sessions) != NULL)
		for (i = 0; i < conn->session_size; ++s, ++i) {
			/*
			 * If hash arrays were allocated, free them now.
			 */
			__wt_free(session, s->dhhash);
			__wt_free(session, s->tablehash);
			__wt_free(session, s->hazard);
		}

	/* Destroy the handle. */
	WT_TRET(__wt_connection_destroy(conn));

	return (ret);
}
示例#27
0
文件: log.c 项目: EaseTech/wiredtiger
/*
 * __log_release --
 *	Release a log slot.
 */
static int
__log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN sync_lsn;
	size_t write_size;
	WT_DECL_SPINLOCK_ID(id);			/* Must appear last */

	conn = S2C(session);
	log = conn->log;
	/*
	 * If we're going to have to close our log file, make a local copy
	 * of the file handle structure.
	 */
	close_fh = NULL;
	if (F_ISSET(slot, SLOT_CLOSEFH)) {
		close_fh = log->log_close_fh;
		log->log_close_fh = NULL;
		F_CLR(slot, SLOT_CLOSEFH);
	}

	/* Write the buffered records */
	if (F_ISSET(slot, SLOT_BUFFERED)) {
		write_size = (size_t)
		    (slot->slot_end_lsn.offset - slot->slot_start_offset);
		WT_ERR(__wt_write(session, slot->slot_fh,
		    slot->slot_start_offset, write_size, slot->slot_buf.mem));
	}

	/*
	 * Wait for earlier groups to finish, otherwise there could be holes
	 * in the log file.
	 */
	while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0)
		__wt_yield();
	log->write_lsn = slot->slot_end_lsn;
	/*
	 * Try to consolidate calls to fsync to wait less.  Acquire a spin lock
	 * so that threads finishing writing to the log will wait while the
	 * current fsync completes and advance log->write_lsn.
	 */
	while (F_ISSET(slot, SLOT_SYNC) &&
	    LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
		if (__wt_spin_trylock(session, &log->log_sync_lock, &id) != 0) {
			(void)__wt_cond_wait(
			    session, log->log_sync_cond, 10000);
			continue;
		}
		/*
		 * Record the current end of log after we grabbed the lock.
		 * That is how far our fsync call with guarantee.
		 */
		sync_lsn = log->write_lsn;
		if (LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
			WT_STAT_FAST_CONN_INCR(session, log_sync);
			ret = __wt_fsync(session, log->log_fh);
			if (ret == 0) {
				F_CLR(slot, SLOT_SYNC);
				log->sync_lsn = sync_lsn;
				ret = __wt_cond_signal(
				    session, log->log_sync_cond);
			}
		}
		__wt_spin_unlock(session, &log->log_sync_lock);
		WT_ERR(ret);
	}
	if (F_ISSET(slot, SLOT_BUF_GROW)) {
		WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
		F_CLR(slot, SLOT_BUF_GROW);
		WT_STAT_FAST_CONN_INCRV(session,
		    log_buffer_size, slot->slot_buf.memsize);
		WT_ERR(__wt_buf_grow(session,
		    &slot->slot_buf, slot->slot_buf.memsize * 2));
	}
	/*
	 * If we have a file to close, close it now.
	 */
	if (close_fh)
		WT_ERR(__wt_close(session, close_fh));

err:	if (ret != 0 && slot->slot_error == 0)
		slot->slot_error = ret;
	return (ret);
}
示例#28
0
/*
 * Child process creates the database and table, and then writes data into
 * the table until it is killed by the parent.
 */
static void
fill_db(void)
{
	FILE *fp;
	WT_CONNECTION *conn;
	WT_CURSOR *cursor;
	WT_ITEM data;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	uint64_t i;
	int ret;
	uint8_t buf[MAX_VAL];

	__wt_random_init(&rnd);
	memset(buf, 0, sizeof(buf));
	/*
	 * Initialize the first 25% to random values.  Leave a bunch of data
	 * space at the end to emphasize zero data.
	 */
	for (i = 0; i < MAX_VAL/4; i++)
		buf[i] = (uint8_t)__wt_random(&rnd);

	/*
	 * Run in the home directory so that the records file is in there too.
	 */
	chdir(home);
	if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
		testutil_die(ret, "wiredtiger_open");
	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "WT_CONNECTION:open_session");
	if ((ret = session->create(session,
	    uri, "key_format=Q,value_format=u")) != 0)
		testutil_die(ret, "WT_SESSION.create: %s", uri);
	if ((ret =
	    session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);

	/*
	 * Keep a separate file with the records we wrote for checking.
	 */
	(void)unlink(RECORDS_FILE);
	if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
		testutil_die(errno, "fopen");
	/*
	 * Set to no buffering.
	 */
	setvbuf(fp, NULL, _IONBF, 0);

	/*
	 * Write data into the table until we are killed by the parent.
	 * The data in the buffer is already set to random content.
	 */
	data.data = buf;
	for (i = 0;; ++i) {
		data.size = __wt_random(&rnd) % MAX_VAL;
		cursor->set_key(cursor, i);
		cursor->set_value(cursor, &data);
		if ((ret = cursor->insert(cursor)) != 0)
			testutil_die(ret, "WT_CURSOR.insert");
		/*
		 * Save the key separately for checking later.
		 */
		if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
			testutil_die(errno, "fprintf");
		if (i % 5000)
			__wt_yield();
	}
}
示例#29
0
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				close_end_lsn.file++;
				close_end_lsn.offset = 0;
				WT_ERR(__wt_fsync(session, close_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				WT_ERR(__wt_fsync(session, log->log_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					log->sync_lsn = min_lsn;
					WT_ERR(__wt_cond_signal(
					    session, log->log_sync_cond));
				}
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				WT_ERR(__wt_cond_signal(
				    session, conn->log_wrlsn_cond));
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
示例#30
0
文件: bt_page.c 项目: 3rf/mongo
/*
 * __wt_page_in_func --
 *	Acquire a hazard pointer to a page; if the page is not in-memory,
 *	read it from the disk and build an in-memory version.
 */
int
__wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
    , const char *file, int line
#endif
    )
{
	WT_DECL_RET;
	WT_PAGE *page;
	int busy, force_attempts, oldgen;

	for (force_attempts = oldgen = 0;;) {
		switch (ref->state) {
		case WT_REF_DISK:
		case WT_REF_DELETED:
			if (LF_ISSET(WT_READ_CACHE))
				return (WT_NOTFOUND);

			/*
			 * The page isn't in memory, attempt to read it.
			 * Make sure there is space in the cache.
			 */
			WT_RET(__wt_cache_full_check(session));
			WT_RET(__wt_cache_read(session, ref));
			oldgen = LF_ISSET(WT_READ_WONT_NEED) ||
			    F_ISSET(session, WT_SESSION_NO_CACHE);
			continue;
		case WT_REF_READING:
			if (LF_ISSET(WT_READ_CACHE))
				return (WT_NOTFOUND);
			/* FALLTHROUGH */
		case WT_REF_LOCKED:
			if (LF_ISSET(WT_READ_NO_WAIT))
				return (WT_NOTFOUND);
			/* The page is busy -- wait. */
			break;
		case WT_REF_SPLIT:
			return (WT_RESTART);
		case WT_REF_MEM:
			/*
			 * The page is in memory: get a hazard pointer, update
			 * the page's LRU and return.  The expected reason we
			 * can't get a hazard pointer is because the page is
			 * being evicted; yield and try again.
			 */
#ifdef HAVE_DIAGNOSTIC
			WT_RET(
			    __wt_hazard_set(session, ref, &busy, file, line));
#else
			WT_RET(__wt_hazard_set(session, ref, &busy));
#endif
			if (busy)
				break;

			page = ref->page;
			WT_ASSERT(session, page != NULL);

			/* Forcibly evict pages that are too big. */
			if (!LF_ISSET(WT_READ_NO_EVICT) &&
			    force_attempts < 10 &&
			    __evict_force_check(session, page)) {
				++force_attempts;
				WT_RET(__wt_page_release(session, ref, flags));
				break;
			}

			/* Check if we need an autocommit transaction. */
			if ((ret = __wt_txn_autocommit_check(session)) != 0) {
				WT_TRET(__wt_hazard_clear(session, page));
				return (ret);
			}

			/*
			 * If we read the page and we are configured to not
			 * trash the cache, set the oldest read generation so
			 * the page is forcibly evicted as soon as possible.
			 *
			 * Otherwise, update the page's read generation.
			 */
			if (oldgen && page->read_gen == WT_READGEN_NOTSET)
				__wt_page_evict_soon(page);
			else if (!LF_ISSET(WT_READ_NO_GEN) &&
			    page->read_gen < __wt_cache_read_gen(session))
				page->read_gen =
				    __wt_cache_read_gen_set(session);

			return (0);
		WT_ILLEGAL_VALUE(session);
		}

		/* We failed to get the page -- yield before retrying. */
		__wt_yield();
	}
}