Beispiel #1
0
/*
 * fop --
 *	File operation function.
 */
static void *
fop(void *arg)
{
	STATS *s;
	uintptr_t id;
	WT_RAND_STATE rnd;
	u_int i;

	id = (uintptr_t)arg;
	__wt_yield();		/* Get all the threads created. */

	s = &run_stats[id];
	__wt_random_init(&rnd);

	for (i = 0; i < nops; ++i, __wt_yield())
		switch (__wt_random(&rnd) % 10) {
		case 0:
			++s->bulk;
			obj_bulk();
			break;
		case 1:
			++s->create;
			obj_create();
			break;
		case 2:
			++s->cursor;
			obj_cursor();
			break;
		case 3:
			++s->drop;
			obj_drop(__wt_random(&rnd) & 1);
			break;
		case 4:
			++s->ckpt;
			obj_checkpoint();
			break;
		case 5:
			++s->upgrade;
			obj_upgrade();
			break;
		case 6:
			++s->rebalance;
			obj_rebalance();
			break;
		case 7:
			++s->verify;
			obj_verify();
			break;
		case 8:
			++s->bulk_unique;
			obj_bulk_unique(__wt_random(&rnd) & 1);
			break;
		case 9:
			++s->create_unique;
			obj_create_unique(__wt_random(&rnd) & 1);
			break;
		}

	return (NULL);
}
Beispiel #2
0
/*
 * thread_ckpt_run --
 *	Runner function for the checkpoint thread.
 */
static WT_THREAD_RET
thread_ckpt_run(void *arg)
{
	FILE *fp;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	WT_THREAD_DATA *td;
	uint64_t ts;
	uint32_t sleep_time;
	int i, ret;
	bool first_ckpt;

	__wt_random_init(&rnd);

	td = (WT_THREAD_DATA *)arg;
	/*
	 * Keep a separate file with the records we wrote for checking.
	 */
	(void)unlink(ckpt_file);
	if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "WT_CONNECTION:open_session");
	first_ckpt = true;
	ts = 0;
	for (i = 0; ;++i) {
		sleep_time = __wt_random(&rnd) % MAX_CKPT_INTERVAL;
		sleep(sleep_time);
		if (use_ts)
			ts = global_ts;
		/*
		 * Since this is the default, send in this string even if
		 * running without timestamps.
		 */
		testutil_check(session->checkpoint(
		    session, "use_timestamp=true"));
		printf("Checkpoint %d complete.  Minimum ts %" PRIu64 "\n",
		    i, ts);
		fflush(stdout);
		/*
		 * Create the checkpoint file so that the parent process knows
		 * at least one checkpoint has finished and can start its
		 * timer.
		 */
		if (first_ckpt) {
			testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
			first_ckpt = false;
			testutil_checksys(fclose(fp) != 0);
		}
	}
	/* NOTREACHED */
}
Beispiel #3
0
/*
 * thread_ckpt_run --
 *	Runner function for the checkpoint thread.
 */
static WT_THREAD_RET
thread_ckpt_run(void *arg)
{
	FILE *fp;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	THREAD_DATA *td;
	uint64_t stable;
	uint32_t sleep_time;
	int i;
	bool first_ckpt;
	char ts_string[WT_TS_HEX_STRING_SIZE];

	__wt_random_init(&rnd);

	td = (THREAD_DATA *)arg;
	/*
	 * Keep a separate file with the records we wrote for checking.
	 */
	(void)unlink(ckpt_file);
	testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
	first_ckpt = true;
	for (i = 0; ;++i) {
		sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
		sleep(sleep_time);
		/*
		 * Since this is the default, send in this string even if
		 * running without timestamps.
		 */
		testutil_check(session->checkpoint(
		    session, "use_timestamp=true"));
		testutil_check(td->conn->query_timestamp(
		    td->conn, ts_string, "get=last_checkpoint"));
		testutil_assert(sscanf(ts_string, "%" SCNx64, &stable) == 1);
		printf("Checkpoint %d complete at stable %"
		    PRIu64 ".\n", i, stable);
		fflush(stdout);
		/*
		 * Create the checkpoint file so that the parent process knows
		 * at least one checkpoint has finished and can start its
		 * timer.
		 */
		if (first_ckpt) {
			testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
			first_ckpt = false;
			testutil_checksys(fclose(fp) != 0);
		}
	}
	/* NOTREACHED */
}
Beispiel #4
0
static void *
ops(void *arg)
{
	TINFO *tinfo;
	WT_CONNECTION *conn;
	WT_CURSOR *cursor, *cursor_insert;
	WT_SESSION *session;
	WT_ITEM key, value;
	uint64_t keyno, ckpt_op, session_op;
	uint32_t op;
	uint8_t *keybuf, *valbuf;
	u_int np;
	int ckpt_available, dir, insert, intxn, notfound, readonly, ret;
	char *ckpt_config, ckpt_name[64];

	tinfo = arg;

	/* Initialize the per-thread random number generator. */
	__wt_random_init(&tinfo->rnd);

	conn = g.wts_conn;
	keybuf = valbuf = NULL;
	readonly = 0;			/* -Wconditional-uninitialized */

	/* Set up the default key and value buffers. */
	key_gen_setup(&keybuf);
	val_gen_setup(&tinfo->rnd, &valbuf);

	/* Set the first operation where we'll create sessions and cursors. */
	session_op = 0;
	session = NULL;
	cursor = cursor_insert = NULL;

	/* Set the first operation where we'll perform checkpoint operations. */
	ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0;
	ckpt_available = 0;

	for (intxn = 0; !tinfo->quit; ++tinfo->ops) {
		/*
		 * We can't checkpoint or swap sessions/cursors while in a
		 * transaction, resolve any running transaction.
		 */
		if (intxn &&
		    (tinfo->ops == ckpt_op || tinfo->ops == session_op)) {
			if ((ret = session->commit_transaction(
			    session, NULL)) != 0)
				die(ret, "session.commit_transaction");
			++tinfo->commit;
			intxn = 0;
		}

		/* Open up a new session and cursors. */
		if (tinfo->ops == session_op ||
		    session == NULL || cursor == NULL) {
			if (session != NULL &&
			    (ret = session->close(session, NULL)) != 0)
				die(ret, "session.close");

			if ((ret = conn->open_session(conn, NULL,
			    ops_session_config(&tinfo->rnd), &session)) != 0)
				die(ret, "connection.open_session");

			/*
			 * 10% of the time, perform some read-only operations
			 * from a checkpoint.
			 *
			 * Skip that if we single-threaded and doing checks
			 * against a Berkeley DB database, because that won't
			 * work because the Berkeley DB database records won't
			 * match the checkpoint.  Also skip if we are using
			 * LSM, because it doesn't support reads from
			 * checkpoints.
			 */
			if (!SINGLETHREADED && !DATASOURCE("lsm") &&
			    ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) {
				if ((ret = session->open_cursor(session,
				    g.uri, NULL, ckpt_name, &cursor)) != 0)
					die(ret, "session.open_cursor");

				/* Pick the next session/cursor close/open. */
				session_op += 250;

				/* Checkpoints are read-only. */
				readonly = 1;
			} else {
				/*
				 * Open two cursors: one for overwriting and one
				 * for append (if it's a column-store).
				 *
				 * The reason is when testing with existing
				 * records, we don't track if a record was
				 * deleted or not, which means we must use
				 * cursor->insert with overwriting configured.
				 * But, in column-store files where we're
				 * testing with new, appended records, we don't
				 * want to have to specify the record number,
				 * which requires an append configuration.
				 */
				if ((ret = session->open_cursor(session, g.uri,
				    NULL, "overwrite", &cursor)) != 0)
					die(ret, "session.open_cursor");
				if ((g.type == FIX || g.type == VAR) &&
				    (ret = session->open_cursor(session, g.uri,
				    NULL, "append", &cursor_insert)) != 0)
					die(ret, "session.open_cursor");

				/* Pick the next session/cursor close/open. */
				session_op += mmrand(&tinfo->rnd, 100, 5000);

				/* Updates supported. */
				readonly = 0;
			}
		}

		/* Checkpoint the database. */
		if (tinfo->ops == ckpt_op && g.c_checkpoints) {
			/*
			 * LSM and data-sources don't support named checkpoints,
			 * and we can't drop a named checkpoint while there's a
			 * cursor open on it, otherwise 20% of the time name the
			 * checkpoint.
			 */
			if (DATASOURCE("helium") || DATASOURCE("kvsbdb") ||
			    DATASOURCE("lsm") ||
			    readonly || mmrand(&tinfo->rnd, 1, 5) == 1)
				ckpt_config = NULL;
			else {
				(void)snprintf(ckpt_name, sizeof(ckpt_name),
				    "name=thread-%d", tinfo->id);
				ckpt_config = ckpt_name;
			}

			/* Named checkpoints lock out backups */
			if (ckpt_config != NULL &&
			    (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0)
				die(ret,
				    "pthread_rwlock_wrlock: backup lock");

			if ((ret =
			    session->checkpoint(session, ckpt_config)) != 0)
				die(ret, "session.checkpoint%s%s",
				    ckpt_config == NULL ? "" : ": ",
				    ckpt_config == NULL ? "" : ckpt_config);

			if (ckpt_config != NULL &&
			    (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0)
				die(ret,
				    "pthread_rwlock_wrlock: backup lock");

			/* Rephrase the checkpoint name for cursor open. */
			if (ckpt_config == NULL)
				strcpy(ckpt_name,
				    "checkpoint=WiredTigerCheckpoint");
			else
				(void)snprintf(ckpt_name, sizeof(ckpt_name),
				    "checkpoint=thread-%d", tinfo->id);
			ckpt_available = 1;

			/* Pick the next checkpoint operation. */
			ckpt_op += mmrand(&tinfo->rnd, 5000, 20000);
		}

		/*
		 * If we're not single-threaded and we're not in a transaction,
		 * start a transaction 20% of the time.
		 */
		if (!SINGLETHREADED &&
		    !intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) {
			if ((ret =
			    session->begin_transaction(session, NULL)) != 0)
				die(ret, "session.begin_transaction");
			intxn = 1;
		}

		insert = notfound = 0;

		keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
		key.data = keybuf;
		value.data = valbuf;

		/*
		 * Perform some number of operations: the percentage of deletes,
		 * inserts and writes are specified, reads are the rest.  The
		 * percentages don't have to add up to 100, a high percentage
		 * of deletes will mean fewer inserts and writes.  Modifications
		 * are always followed by a read to confirm it worked.
		 */
		op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100);
		if (op < g.c_delete_pct) {
			++tinfo->remove;
			switch (g.type) {
			case ROW:
				/*
				 * If deleting a non-existent record, the cursor
				 * won't be positioned, and so can't do a next.
				 */
				if (row_remove(cursor, &key, keyno, &notfound))
					goto deadlock;
				break;
			case FIX:
			case VAR:
				if (col_remove(cursor, &key, keyno, &notfound))
					goto deadlock;
				break;
			}
		} else if (op < g.c_delete_pct + g.c_insert_pct) {
			++tinfo->insert;
			switch (g.type) {
			case ROW:
				if (row_insert(
				    tinfo, cursor, &key, &value, keyno))
					goto deadlock;
				insert = 1;
				break;
			case FIX:
			case VAR:
				/*
				 * We can only append so many new records, if
				 * we've reached that limit, update a record
				 * instead of doing an insert.
				 */
				if (g.append_cnt >= g.append_max)
					goto skip_insert;

				/* Insert, then reset the insert cursor. */
				if (col_insert(tinfo,
				    cursor_insert, &key, &value, &keyno))
					goto deadlock;
				if ((ret =
				    cursor_insert->reset(cursor_insert)) != 0)
					die(ret, "cursor.reset");

				insert = 1;
				break;
			}
		} else if (
		    op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) {
			++tinfo->update;
			switch (g.type) {
			case ROW:
				if (row_update(
				    tinfo, cursor, &key, &value, keyno))
					goto deadlock;
				break;
			case FIX:
			case VAR:
skip_insert:			if (col_update(tinfo,
				    cursor, &key, &value, keyno))
					goto deadlock;
				break;
			}
		} else {
			++tinfo->search;
			if (read_row(cursor, &key, keyno))
				if (intxn)
					goto deadlock;
			continue;
		}

		/*
		 * The cursor is positioned if we did any operation other than
		 * insert, do a small number of next/prev cursor operations in
		 * a random direction.
		 */
		if (!insert) {
			dir = (int)mmrand(&tinfo->rnd, 0, 1);
			for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) {
				if (notfound)
					break;
				if (nextprev(cursor, dir, &notfound))
					goto deadlock;
			}
		}

		/* Read to confirm the operation. */
		++tinfo->search;
		if (read_row(cursor, &key, keyno))
			goto deadlock;

		/* Reset the cursor: there is no reason to keep pages pinned. */
		if ((ret = cursor->reset(cursor)) != 0)
			die(ret, "cursor.reset");

		/*
		 * If we're in the transaction, commit 40% of the time and
		 * rollback 10% of the time.
		 */
		if (intxn)
			switch (mmrand(&tinfo->rnd, 1, 10)) {
			case 1: case 2: case 3: case 4:		/* 40% */
				if ((ret = session->commit_transaction(
				    session, NULL)) != 0)
					die(ret, "session.commit_transaction");
				++tinfo->commit;
				intxn = 0;
				break;
			case 5:					/* 10% */
				if (0) {
deadlock:				++tinfo->deadlock;
				}
				if ((ret = session->rollback_transaction(
				    session, NULL)) != 0)
					die(ret,
					    "session.rollback_transaction");
				++tinfo->rollback;
				intxn = 0;
				break;
			default:
				break;
			}
	}

	if (session != NULL && (ret = session->close(session, NULL)) != 0)
		die(ret, "session.close");

	free(keybuf);
	free(valbuf);

	tinfo->state = TINFO_COMPLETE;
	return (NULL);
}
Beispiel #5
0
/*
 * __wt_connection_init --
 *	Structure initialization for a just-created WT_CONNECTION_IMPL handle.
 */
int
__wt_connection_init(WT_CONNECTION_IMPL *conn)
{
	WT_SESSION_IMPL *session;
	u_int i;

	session = conn->default_session;

	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
		TAILQ_INIT(&conn->dhhash[i]);	/* Data handle hash lists */
		TAILQ_INIT(&conn->fhhash[i]);	/* File handle hash lists */
	}

	TAILQ_INIT(&conn->dhqh);		/* Data handle list */
	TAILQ_INIT(&conn->dlhqh);		/* Library list */
	TAILQ_INIT(&conn->dsrcqh);		/* Data source list */
	TAILQ_INIT(&conn->fhqh);		/* File list */
	TAILQ_INIT(&conn->collqh);		/* Collator list */
	TAILQ_INIT(&conn->compqh);		/* Compressor list */
	TAILQ_INIT(&conn->encryptqh);		/* Encryptor list */
	TAILQ_INIT(&conn->extractorqh);		/* Extractor list */

	TAILQ_INIT(&conn->lsmqh);		/* WT_LSM_TREE list */

	/* Setup the LSM work queues. */
	TAILQ_INIT(&conn->lsm_manager.switchqh);
	TAILQ_INIT(&conn->lsm_manager.appqh);
	TAILQ_INIT(&conn->lsm_manager.managerqh);

	/* Random numbers. */
	__wt_random_init(&session->rnd);

	/* Configuration. */
	WT_RET(__wt_conn_config_init(session));

	/* Statistics. */
	WT_RET(__wt_stat_connection_init(session, conn));

	/* Spinlocks. */
	WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
	WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
	WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list);
	WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
	WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
	WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
	WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
	WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
	WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema);
	WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table);
	WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));

	/* Read-write locks */
	WT_RET(__wt_rwlock_alloc(
	    session, &conn->hot_backup_lock, "hot backup"));

	WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock));
	WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->page_lock);
	for (i = 0; i < WT_PAGE_LOCKS; ++i)
		WT_RET(
		    __wt_spin_init(session, &conn->page_lock[i], "btree page"));

	/* Setup the spin locks for the LSM manager queues. */
	WT_RET(__wt_spin_init(session,
	    &conn->lsm_manager.app_lock, "LSM application queue lock"));
	WT_RET(__wt_spin_init(session,
	    &conn->lsm_manager.manager_lock, "LSM manager queue lock"));
	WT_RET(__wt_spin_init(
	    session, &conn->lsm_manager.switch_lock, "LSM switch queue lock"));
	WT_RET(__wt_cond_alloc(
	    session, "LSM worker cond", false, &conn->lsm_manager.work_cond));

	/*
	 * Generation numbers.
	 *
	 * Start split generations at one.  Threads publish this generation
	 * number before examining tree structures, and zero when they leave.
	 * We need to distinguish between threads that are in a tree before the
	 * first split has happened, and threads that are not in a tree.
	 */
	conn->split_gen = 1;

	/*
	 * Block manager.
	 * XXX
	 * If there's ever a second block manager, we'll want to make this
	 * more opaque, but for now this is simpler.
	 */
	WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager"));
	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
		TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */
	TAILQ_INIT(&conn->blockqh);		/* Block manager list */

	return (0);
}
Beispiel #6
0
/*
 * thread_run --
 *	Runner function for the worker threads.
 */
static WT_THREAD_RET
thread_run(void *arg)
{
	FILE *fp;
	WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
	WT_ITEM data;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	WT_THREAD_DATA *td;
	uint64_t i, stable_ts;
	int ret;
	char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
	char kname[64], tscfg[64];

	__wt_random_init(&rnd);
	memset(cbuf, 0, sizeof(cbuf));
	memset(lbuf, 0, sizeof(lbuf));
	memset(obuf, 0, sizeof(obuf));
	memset(kname, 0, sizeof(kname));

	td = (WT_THREAD_DATA *)arg;
	/*
	 * Set up the separate file for checking.
	 */
	testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->id));
	(void)unlink(cbuf);
	testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
	/*
	 * Set to line buffering.  But that is advisory only.  We've seen
	 * cases where the result files end up with partial lines.
	 */
	__wt_stream_set_line_buffer(fp);
	if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "WT_CONNECTION:open_session");
	/*
	 * Open a cursor to each table.
	 */
	if ((ret = session->open_cursor(session,
	    uri_collection, NULL, NULL, &cur_coll)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection);
	if ((ret = session->open_cursor(session,
	    uri_local, NULL, NULL, &cur_local)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local);
	if ((ret = session->open_cursor(session,
	    uri_oplog, NULL, NULL, &cur_oplog)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog);

	if ((ret = session->open_cursor(
	    session, stable_store, NULL, NULL, &cur_stable)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store);

	/*
	 * Write our portion of the key space until we're killed.
	 */
	printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->id, td->start);
	for (i = td->start; ; ++i) {
		if (use_ts)
			stable_ts = global_ts++;
		else
			stable_ts = 0;
		testutil_check(__wt_snprintf(
		    kname, sizeof(kname), "%" PRIu64, i));

		testutil_check(session->begin_transaction(session, NULL));
		cur_coll->set_key(cur_coll, kname);
		cur_local->set_key(cur_local, kname);
		cur_oplog->set_key(cur_oplog, kname);
		/*
		 * Put an informative string into the value so that it
		 * can be viewed well in a binary dump.
		 */
		testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
		    "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->id, stable_ts, i));
		testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
		    "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->id, stable_ts, i));
		testutil_check(__wt_snprintf(obuf, sizeof(obuf),
		    "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->id, stable_ts, i));
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = cbuf;
		cur_coll->set_value(cur_coll, &data);
		if ((ret = cur_coll->insert(cur_coll)) != 0)
			testutil_die(ret, "WT_CURSOR.insert");
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = obuf;
		cur_oplog->set_value(cur_oplog, &data);
		if ((ret = cur_oplog->insert(cur_oplog)) != 0)
			testutil_die(ret, "WT_CURSOR.insert");
		if (use_ts) {
			testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
			    "commit_timestamp=%" PRIx64, stable_ts));
			testutil_check(
			    session->commit_transaction(session, tscfg));
		} else
			testutil_check(
			    session->commit_transaction(session, NULL));
		/*
		 * Insert into the local table outside the timestamp txn.
		 */
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = lbuf;
		cur_local->set_value(cur_local, &data);
		if ((ret = cur_local->insert(cur_local)) != 0)
			testutil_die(ret, "WT_CURSOR.insert");

		/*
		 * Every N records we will record our stable timestamp into the
		 * stable table.  That will define our threshold where we
		 * expect to find records after recovery.
		 */
		if (i % STABLE_PERIOD == 0) {
			if (use_ts) {
				/*
				 * Set both the oldest and stable timestamp
				 * so that we don't need to maintain read
				 * availability at older timestamps.
				 */
				testutil_check(__wt_snprintf(
				    tscfg, sizeof(tscfg),
				    "oldest_timestamp=%" PRIx64
				    ",stable_timestamp=%" PRIx64,
				    stable_ts, stable_ts));
				testutil_check(
				    td->conn->set_timestamp(td->conn, tscfg));
			}
			cur_stable->set_key(cur_stable, td->id);
			cur_stable->set_value(cur_stable, stable_ts);
			testutil_check(cur_stable->insert(cur_stable));
		}
		/*
		 * Save the timestamp and key separately for checking later.
		 */
		if (fprintf(fp,
		    "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0)
			testutil_die(EIO, "fprintf");
	}
	/* NOTREACHED */
}
Beispiel #7
0
/*
 * Child process creates the database and table, and then writes data into
 * the table until it is killed by the parent.
 */
static void
fill_db(void)
{
	FILE *fp;
	WT_CONNECTION *conn;
	WT_CURSOR *cursor;
	WT_ITEM data;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	uint64_t i;
	int ret;
	uint8_t buf[MAX_VAL];

	__wt_random_init(&rnd);
	memset(buf, 0, sizeof(buf));
	/*
	 * Initialize the first 25% to random values.  Leave a bunch of data
	 * space at the end to emphasize zero data.
	 */
	for (i = 0; i < MAX_VAL/4; i++)
		buf[i] = (uint8_t)__wt_random(&rnd);

	/*
	 * Run in the home directory so that the records file is in there too.
	 */
	chdir(home);
	if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
		testutil_die(ret, "wiredtiger_open");
	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "WT_CONNECTION:open_session");
	if ((ret = session->create(session,
	    uri, "key_format=Q,value_format=u")) != 0)
		testutil_die(ret, "WT_SESSION.create: %s", uri);
	if ((ret =
	    session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);

	/*
	 * Keep a separate file with the records we wrote for checking.
	 */
	(void)unlink(RECORDS_FILE);
	if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
		testutil_die(errno, "fopen");
	/*
	 * Set to no buffering.
	 */
	setvbuf(fp, NULL, _IONBF, 0);

	/*
	 * Write data into the table until we are killed by the parent.
	 * The data in the buffer is already set to random content.
	 */
	data.data = buf;
	for (i = 0;; ++i) {
		data.size = __wt_random(&rnd) % MAX_VAL;
		cursor->set_key(cursor, i);
		cursor->set_value(cursor, &data);
		if ((ret = cursor->insert(cursor)) != 0)
			testutil_die(ret, "WT_CURSOR.insert");
		/*
		 * Save the key separately for checking later.
		 */
		if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
			testutil_die(errno, "fprintf");
		if (i % 5000)
			__wt_yield();
	}
}
Beispiel #8
0
int
main(int argc, char *argv[])
{
	FILE *fp;
	WT_CONNECTION *conn;
	WT_CURSOR *cursor;
	WT_SESSION *session;
	WT_RAND_STATE rnd;
	uint64_t key;
	uint32_t absent, count, timeout;
	int ch, status, ret;
	pid_t pid;
	char *working_dir;

	if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
		progname = argv[0];
	else
		++progname;

	working_dir = NULL;
	timeout = 10;
	while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF)
		switch (ch) {
		case 'h':
			working_dir = __wt_optarg;
			break;
		case 't':
			timeout = (uint32_t)atoi(__wt_optarg);
			break;
		default:
			usage();
		}
	argc -= __wt_optind;
	argv += __wt_optind;
	if (argc != 0)
		usage();

	testutil_work_dir_from_path(home, 512, working_dir);
	testutil_make_work_dir(home);

	/*
	 * Fork a child to insert as many items.  We will then randomly
	 * kill the child, run recovery and make sure all items we wrote
	 * exist after recovery runs.
	 */
	if ((pid = fork()) < 0)
		testutil_die(errno, "fork");

	if (pid == 0) { /* child */
		fill_db();
		return (EXIT_SUCCESS);
	}

	/* parent */
	__wt_random_init(&rnd);
	/* Sleep for the configured amount of time before killing the child. */
	printf("Parent: sleep %" PRIu32 " seconds, then kill child\n", timeout);
	sleep(timeout);

	/*
	 * !!! It should be plenty long enough to make sure more than one
	 * log file exists.  If wanted, that check would be added here.
	 */
	printf("Kill child\n");
	if (kill(pid, SIGKILL) != 0)
		testutil_die(errno, "kill");
	waitpid(pid, &status, 0);

	/*
	 * !!! If we wanted to take a copy of the directory before recovery,
	 * this is the place to do it.
	 */
	chdir(home);
	printf("Open database, run recovery and verify content\n");
	if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
		testutil_die(ret, "wiredtiger_open");
	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
		testutil_die(ret, "WT_CONNECTION:open_session");
	if ((ret =
	    session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);

	if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
		testutil_die(errno, "fopen");

	/*
	 * For every key in the saved file, verify that the key exists
	 * in the table after recovery.  Since we did write-no-sync, we
	 * expect every key to have been recovered.
	 */
	for (absent = count = 0;; ++count) {
		ret = fscanf(fp, "%" SCNu64 "\n", &key);
		if (ret != EOF && ret != 1)
			testutil_die(errno, "fscanf");
		if (ret == EOF)
			break;
		cursor->set_key(cursor, key);
		if ((ret = cursor->search(cursor)) != 0) {
			if (ret != WT_NOTFOUND)
				testutil_die(ret, "search");
			printf("no record with key %" PRIu64 "\n", key);
			++absent;
		}
	}
	fclose(fp);
	if ((ret = conn->close(conn, NULL)) != 0)
		testutil_die(ret, "WT_CONNECTION:close");
	if (absent) {
		printf("%u record(s) absent from %u\n", absent, count);
		return (EXIT_FAILURE);
	}
	printf("%u records verified\n", count);
	return (EXIT_SUCCESS);
}
Beispiel #9
0
/*
 * thread_run --
 *	Runner function for the worker threads.
 */
static WT_THREAD_RET
thread_run(void *arg)
{
	FILE *fp;
	WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
	WT_ITEM data;
	WT_RAND_STATE rnd;
	WT_SESSION *prepared_session, *session;
	THREAD_DATA *td;
	uint64_t i, active_ts;
	char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
	char kname[64], tscfg[64], uri[128];
	bool use_prep;

	__wt_random_init(&rnd);
	memset(cbuf, 0, sizeof(cbuf));
	memset(lbuf, 0, sizeof(lbuf));
	memset(obuf, 0, sizeof(obuf));
	memset(kname, 0, sizeof(kname));

	prepared_session = NULL;
	td = (THREAD_DATA *)arg;
	/*
	 * Set up the separate file for checking.
	 */
	testutil_check(__wt_snprintf(
	    cbuf, sizeof(cbuf), RECORDS_FILE, td->info));
	(void)unlink(cbuf);
	testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
	/*
	 * Set to line buffering.  But that is advisory only.  We've seen
	 * cases where the result files end up with partial lines.
	 */
	__wt_stream_set_line_buffer(fp);

	/*
	 * Have 10% of the threads use prepared transactions if timestamps
	 * are in use. Thread numbers start at 0 so we're always guaranteed
	 * that at least one thread is using prepared transactions.
	 */
	use_prep = (use_ts && td->info % PREPARE_PCT == 0) ? true : false;

	/*
	 * For the prepared case we have two sessions so that the oplog session
	 * can have its own transaction in parallel with the collection session
	 * We need this because prepared transactions cannot have any operations
	 * that modify a table that is logged. But we also want to test mixed
	 * logged and not-logged transactions.
	 */
	testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
	if (use_prep)
		testutil_check(td->conn->open_session(
		    td->conn, NULL, NULL, &prepared_session));
	/*
	 * Open a cursor to each table.
	 */
	testutil_check(__wt_snprintf(
	    uri, sizeof(uri), "%s:%s", table_pfx, uri_collection));
	if (use_prep)
		testutil_check(prepared_session->open_cursor(prepared_session,
		    uri, NULL, NULL, &cur_coll));
	else
		testutil_check(session->open_cursor(session,
		    uri, NULL, NULL, &cur_coll));
	testutil_check(__wt_snprintf(
	    uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
	if (use_prep)
		testutil_check(prepared_session->open_cursor(prepared_session,
		    uri, NULL, NULL, &cur_local));
	else
		testutil_check(session->open_cursor(session,
		    uri, NULL, NULL, &cur_local));
	testutil_check(__wt_snprintf(
	    uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog));
	testutil_check(session->open_cursor(session,
	    uri, NULL, NULL, &cur_oplog));

	/*
	 * Write our portion of the key space until we're killed.
	 */
	printf("Thread %" PRIu32 " starts at %" PRIu64 "\n",
	    td->info, td->start);
	active_ts = 0;
	for (i = td->start;; ++i) {
		testutil_check(__wt_snprintf(
		    kname, sizeof(kname), "%" PRIu64, i));

		testutil_check(session->begin_transaction(session, NULL));
		if (use_prep)
			testutil_check(prepared_session->begin_transaction(
			    prepared_session, NULL));

		if (use_ts) {
			testutil_check(pthread_rwlock_rdlock(&ts_lock));
			active_ts = __wt_atomic_addv64(&global_ts, 1);
			testutil_check(__wt_snprintf(tscfg,
			    sizeof(tscfg), "commit_timestamp=%" PRIx64,
			    active_ts));
			/*
			 * Set the transaction's timestamp now before performing
			 * the operation. If we are using prepared transactions,
			 * set the timestamp for the session used for oplog. The
			 * collection session in that case would continue to use
			 * this timestamp.
			 */
			testutil_check(session->timestamp_transaction(
			    session, tscfg));
			testutil_check(pthread_rwlock_unlock(&ts_lock));
		}

		cur_coll->set_key(cur_coll, kname);
		cur_local->set_key(cur_local, kname);
		cur_oplog->set_key(cur_oplog, kname);
		/*
		 * Put an informative string into the value so that it
		 * can be viewed well in a binary dump.
		 */
		testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
		    "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->info, active_ts, i));
		testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
		    "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->info, active_ts, i));
		testutil_check(__wt_snprintf(obuf, sizeof(obuf),
		    "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
		    td->info, active_ts, i));
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = cbuf;
		cur_coll->set_value(cur_coll, &data);
		testutil_check(cur_coll->insert(cur_coll));
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = obuf;
		cur_oplog->set_value(cur_oplog, &data);
		testutil_check(cur_oplog->insert(cur_oplog));
		if (use_prep) {
			/*
			 * Run with prepare every once in a while. And also
			 * yield after prepare sometimes too. This is only done
			 * on the collection session.
			 */
			if (i % PREPARE_FREQ == 0) {
				testutil_check(__wt_snprintf(tscfg,
				    sizeof(tscfg), "prepare_timestamp=%"
				    PRIx64, active_ts));
				testutil_check(
				    prepared_session->prepare_transaction(
				    prepared_session, tscfg));
				if (i % PREPARE_YIELD == 0)
					__wt_yield();
				testutil_check(
				    __wt_snprintf(tscfg, sizeof(tscfg),
				    "commit_timestamp=%" PRIx64
				    ",durable_timestamp=%" PRIx64,
				    active_ts, active_ts));
			} else
				testutil_check(
				    __wt_snprintf(tscfg, sizeof(tscfg),
				    "commit_timestamp=%" PRIx64, active_ts));

			testutil_check(
			    prepared_session->commit_transaction(
			    prepared_session, tscfg));
		}
		testutil_check(
		    session->commit_transaction(session, NULL));
		/*
		 * Insert into the local table outside the timestamp txn.
		 */
		data.size = __wt_random(&rnd) % MAX_VAL;
		data.data = lbuf;
		cur_local->set_value(cur_local, &data);
		testutil_check(cur_local->insert(cur_local));

		/*
		 * Save the timestamp and key separately for checking later.
		 */
		if (fprintf(fp,
		    "%" PRIu64 " %" PRIu64 "\n", active_ts, i) < 0)
			testutil_die(EIO, "fprintf");
	}
	/* NOTREACHED */
}
Beispiel #10
0
/*
 * __wt_connection_init --
 *	Structure initialization for a just-created WT_CONNECTION_IMPL handle.
 */
int
__wt_connection_init(WT_CONNECTION_IMPL *conn)
{
	WT_SESSION_IMPL *session;
	u_int i;

	session = conn->default_session;

	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
		TAILQ_INIT(&conn->dhhash[i]);	/* Data handle hash lists */
		TAILQ_INIT(&conn->fhhash[i]);	/* File handle hash lists */
	}

	TAILQ_INIT(&conn->dhqh);		/* Data handle list */
	TAILQ_INIT(&conn->dlhqh);		/* Library list */
	TAILQ_INIT(&conn->dsrcqh);		/* Data source list */
	TAILQ_INIT(&conn->fhqh);		/* File list */
	TAILQ_INIT(&conn->collqh);		/* Collator list */
	TAILQ_INIT(&conn->compqh);		/* Compressor list */
	TAILQ_INIT(&conn->encryptqh);		/* Encryptor list */
	TAILQ_INIT(&conn->extractorqh);		/* Extractor list */

	TAILQ_INIT(&conn->lsmqh);		/* WT_LSM_TREE list */

	/* Setup the LSM work queues. */
	TAILQ_INIT(&conn->lsm_manager.switchqh);
	TAILQ_INIT(&conn->lsm_manager.appqh);
	TAILQ_INIT(&conn->lsm_manager.managerqh);

	/* Random numbers. */
	__wt_random_init(&session->rnd);

	/* Configuration. */
	WT_RET(__wt_conn_config_init(session));

	/* Statistics. */
	WT_RET(__wt_stat_connection_init(session, conn));

	/* Spinlocks. */
	WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
	WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
	WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
	WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
	WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
	WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
	WT_SPIN_INIT_SESSION_TRACKED(session, &conn->schema_lock, schema);
	WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));

	/* Read-write locks */
	WT_RWLOCK_INIT_SESSION_TRACKED(session, &conn->dhandle_lock, dhandle);
	WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock));
	WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table);

	/* Setup the spin locks for the LSM manager queues. */
	WT_RET(__wt_spin_init(session,
	    &conn->lsm_manager.app_lock, "LSM application queue lock"));
	WT_RET(__wt_spin_init(session,
	    &conn->lsm_manager.manager_lock, "LSM manager queue lock"));
	WT_RET(__wt_spin_init(
	    session, &conn->lsm_manager.switch_lock, "LSM switch queue lock"));
	WT_RET(__wt_cond_alloc(
	    session, "LSM worker cond", &conn->lsm_manager.work_cond));

	/* Initialize the generation manager. */
	__wt_gen_init(session);

	/*
	 * Block manager.
	 * XXX
	 * If there's ever a second block manager, we'll want to make this
	 * more opaque, but for now this is simpler.
	 */
	WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager"));
	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
		TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */
	TAILQ_INIT(&conn->blockqh);		/* Block manager list */

	return (0);
}
Beispiel #11
0
int
main(int argc, char *argv[])
{
	time_t start;
	int ch, i, onerun, reps, ret;
	const char *config, *home;

	config = NULL;

	if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL)
		g.progname = argv[0];
	else
		++g.progname;

#if 0
	/* Configure the GNU malloc for debugging. */
	(void)setenv("MALLOC_CHECK_", "2", 1);
#endif
#if 0
	/* Configure the FreeBSD malloc for debugging. */
	(void)setenv("MALLOC_OPTIONS", "AJ", 1);
#endif

	/* Track progress unless we're re-directing output to a file. */
	g.track = isatty(1) ? 1 : 0;

	/* Set values from the command line. */
	home = NULL;
	onerun = 0;
	while ((ch = __wt_getopt(
	    g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF)
		switch (ch) {
		case '1':			/* One run */
			onerun = 1;
			break;
		case 'C':			/* wiredtiger_open config */
			g.config_open = __wt_optarg;
			break;
		case 'c':			/* Configuration from a file */
			config = __wt_optarg;
			break;
		case 'H':
			g.helium_mount = __wt_optarg;
			break;
		case 'h':
			home = __wt_optarg;
			break;
		case 'L':			/* Re-direct output to a log */
			/*
			 * The -l option is a superset of -L, ignore -L if we
			 * have already configured logging for operations.
			 */
			if (g.logging == 0)
				g.logging = LOG_FILE;
			break;
		case 'l':			/* Turn on operation logging */
			g.logging = LOG_OPS;
			break;
		case 'q':			/* Quiet */
			g.track = 0;
			break;
		case 'r':			/* Replay a run */
			g.replay = 1;
			break;
		default:
			usage();
		}
	argc -= __wt_optind;
	argv += __wt_optind;

	/*
	 * Initialize the global RNG. Start with the standard seeds, and then
	 * use seconds since the Epoch modulo a prime to run the RNG for some
	 * number of steps, so we don't start with the same values every time.
	 */
	__wt_random_init(&g.rnd);
	for (i = (int)time(NULL) % 10007; i > 0; --i)
		(void)__wt_random(&g.rnd);

	/* Set up paths. */
	path_setup(home);

	/* If it's a replay, use the home directory's CONFIG file. */
	if (g.replay) {
		if (config != NULL)
			die(EINVAL, "-c incompatible with -r");
		if (access(g.home_config, R_OK) != 0)
			die(ENOENT, "%s", g.home_config);
		config = g.home_config;
	}

	/*
	 * If we weren't given a configuration file, set values from "CONFIG",
	 * if it exists.
	 *
	 * Small hack to ignore any CONFIG file named ".", that just makes it
	 * possible to ignore any local CONFIG file, used when running checks.
	 */
	if (config == NULL && access("CONFIG", R_OK) == 0)
		config = "CONFIG";
	if (config != NULL && strcmp(config, ".") != 0)
		config_file(config);

	/*
	 * The rest of the arguments are individual configurations that modify
	 * the base configuration.
	 */
	for (; *argv != NULL; ++argv)
		config_single(*argv, 1);

	/*
	 * Multithreaded runs can be replayed: it's useful and we'll get the
	 * configuration correct.  Obviously the order of operations changes,
	 * warn the user.
	 */
	if (g.replay && !SINGLETHREADED)
		printf("Warning: replaying a threaded run\n");

	/*
	 * Single-threaded runs historically exited after a single replay, which
	 * makes sense when you're debugging, leave that semantic in place.
	 */
	if (g.replay && SINGLETHREADED)
		g.c_runs = 1;

	/*
	 * Let the command line -1 flag override runs configured from other
	 * sources.
	 */
	if (onerun)
		g.c_runs = 1;

	/*
	 * Initialize locks to single-thread named checkpoints and backups, last
	 * last-record updates, and failures.
	 */
	if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0)
		die(ret, "pthread_rwlock_init: append lock");
	if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0)
		die(ret, "pthread_rwlock_init: backup lock");
	if ((ret = pthread_rwlock_init(&g.death_lock, NULL)) != 0)
		die(ret, "pthread_rwlock_init: death lock");

	printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid());
	while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) {
		startup();			/* Start a run */

		config_setup();			/* Run configuration */
		config_print(0);		/* Dump run configuration */
		key_len_setup();		/* Setup keys */

		start = time(NULL);
		track("starting up", 0ULL, NULL);

#ifdef HAVE_BERKELEY_DB
		if (SINGLETHREADED)
			bdb_open();		/* Initial file config */
#endif
		wts_open(g.home, 1, &g.wts_conn);
		wts_create();

		wts_load();			/* Load initial records */
		wts_verify("post-bulk verify");	/* Verify */

		/*
		 * If we're not doing any operations, scan the bulk-load, copy
		 * the statistics and we're done. Otherwise, loop reading and
		 * operations, with a verify after each set.
		 */
		if (g.c_timer == 0 && g.c_ops == 0) {
			wts_read_scan();		/* Read scan */
			wts_stats();			/* Statistics */
		} else
			for (reps = 1; reps <= FORMAT_OPERATION_REPS; ++reps) {
				wts_read_scan();	/* Read scan */

							/* Operations */
				wts_ops(reps == FORMAT_OPERATION_REPS);

				/*
				 * Copy out the run's statistics after the last
				 * set of operations.
				 *
				 * XXX
				 * Verify closes the underlying handle and
				 * discards the statistics, read them first.
				 */
				if (reps == FORMAT_OPERATION_REPS)
					wts_stats();

							/* Verify */
				wts_verify("post-ops verify");
			}

		track("shutting down", 0ULL, NULL);
#ifdef HAVE_BERKELEY_DB
		if (SINGLETHREADED)
			bdb_close();
#endif
		wts_close();

		/*
		 * If single-threaded, we can dump and compare the WiredTiger
		 * and Berkeley DB data sets.
		 */
		if (SINGLETHREADED)
			wts_dump("standard", 1);

		/*
		 * Salvage testing.
		 */
		wts_salvage();

		/* Overwrite the progress line with a completion line. */
		if (g.track)
			printf("\r%78s\r", " ");
		printf("%4d: %s, %s (%.0f seconds)\n",
		    g.run_cnt, g.c_data_source,
		    g.c_file_type, difftime(time(NULL), start));
		fflush(stdout);
	}

	/* Flush/close any logging information. */
	fclose_and_clear(&g.logfp);
	fclose_and_clear(&g.randfp);

	config_print(0);

	if ((ret = pthread_rwlock_destroy(&g.append_lock)) != 0)
		die(ret, "pthread_rwlock_destroy: append lock");
	if ((ret = pthread_rwlock_destroy(&g.backup_lock)) != 0)
		die(ret, "pthread_rwlock_destroy: backup lock");

	config_clear();

	return (EXIT_SUCCESS);
}
Beispiel #12
0
/*
 * real_worker --
 *     A single worker thread that transactionally updates all tables with
 *     consistent values.
 */
static int
real_worker(void)
{
	WT_CURSOR **cursors;
	WT_RAND_STATE rnd;
	WT_SESSION *session;
	u_int i, keyno;
	int j, ret, t_ret;

	ret = t_ret = 0;

	__wt_random_init(&rnd);

	if ((cursors = calloc(
	    (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
		return (log_print_err("malloc", ENOMEM, 1));

	if ((ret = g.conn->open_session(
	    g.conn, NULL, "isolation=snapshot", &session)) != 0) {
		(void)log_print_err("conn.open_session", ret, 1);
		goto err;
	}

	for (j = 0; j < g.ntables; j++)
		if ((ret = session->open_cursor(session,
		    g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
			(void)log_print_err("session.open_cursor", ret, 1);
			goto err;
		}

	for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) {
		if ((ret = session->begin_transaction(session, NULL)) != 0) {
			(void)log_print_err(
			    "real_worker:begin_transaction", ret, 1);
			goto err;
		}
		keyno = __wt_random(&rnd) % g.nkeys + 1;
		for (j = 0; j < g.ntables; j++) {
			if ((ret = worker_op(cursors[j], keyno, i)) != 0)
				break;
		}
		if (ret == 0) {
			if ((ret = session->commit_transaction(
			    session, NULL)) != 0) {
				(void)log_print_err(
				    "real_worker:commit_transaction", ret, 1);
				goto err;
			}
		} else if (ret == WT_ROLLBACK) {
			if ((ret = session->rollback_transaction(
			    session, NULL)) != 0) {
				(void)log_print_err(
				    "real_worker:rollback_transaction", ret, 1);
				goto err;
			    }
		} else {
			(void)log_print_err("worker op failed", ret, 1);
			goto err;
		}
	}

err:	if ((t_ret = session->close(session, NULL)) != 0 && ret == 0) {
		ret = t_ret;
		(void)log_print_err("session.close", ret, 1);
	}
	free(cursors);

	return (ret);
}