/* * fop -- * File operation function. */ static void * fop(void *arg) { STATS *s; uintptr_t id; WT_RAND_STATE rnd; u_int i; id = (uintptr_t)arg; __wt_yield(); /* Get all the threads created. */ s = &run_stats[id]; __wt_random_init(&rnd); for (i = 0; i < nops; ++i, __wt_yield()) switch (__wt_random(&rnd) % 10) { case 0: ++s->bulk; obj_bulk(); break; case 1: ++s->create; obj_create(); break; case 2: ++s->cursor; obj_cursor(); break; case 3: ++s->drop; obj_drop(__wt_random(&rnd) & 1); break; case 4: ++s->ckpt; obj_checkpoint(); break; case 5: ++s->upgrade; obj_upgrade(); break; case 6: ++s->rebalance; obj_rebalance(); break; case 7: ++s->verify; obj_verify(); break; case 8: ++s->bulk_unique; obj_bulk_unique(__wt_random(&rnd) & 1); break; case 9: ++s->create_unique; obj_create_unique(__wt_random(&rnd) & 1); break; } return (NULL); }
/* * thread_ckpt_run -- * Runner function for the checkpoint thread. */ static WT_THREAD_RET thread_ckpt_run(void *arg) { FILE *fp; WT_RAND_STATE rnd; WT_SESSION *session; WT_THREAD_DATA *td; uint64_t ts; uint32_t sleep_time; int i, ret; bool first_ckpt; __wt_random_init(&rnd); td = (WT_THREAD_DATA *)arg; /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(ckpt_file); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); first_ckpt = true; ts = 0; for (i = 0; ;++i) { sleep_time = __wt_random(&rnd) % MAX_CKPT_INTERVAL; sleep(sleep_time); if (use_ts) ts = global_ts; /* * Since this is the default, send in this string even if * running without timestamps. */ testutil_check(session->checkpoint( session, "use_timestamp=true")); printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n", i, ts); fflush(stdout); /* * Create the checkpoint file so that the parent process knows * at least one checkpoint has finished and can start its * timer. */ if (first_ckpt) { testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL); first_ckpt = false; testutil_checksys(fclose(fp) != 0); } } /* NOTREACHED */ }
/* * thread_ckpt_run -- * Runner function for the checkpoint thread. */ static WT_THREAD_RET thread_ckpt_run(void *arg) { FILE *fp; WT_RAND_STATE rnd; WT_SESSION *session; THREAD_DATA *td; uint64_t stable; uint32_t sleep_time; int i; bool first_ckpt; char ts_string[WT_TS_HEX_STRING_SIZE]; __wt_random_init(&rnd); td = (THREAD_DATA *)arg; /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(ckpt_file); testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session)); first_ckpt = true; for (i = 0; ;++i) { sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL; sleep(sleep_time); /* * Since this is the default, send in this string even if * running without timestamps. */ testutil_check(session->checkpoint( session, "use_timestamp=true")); testutil_check(td->conn->query_timestamp( td->conn, ts_string, "get=last_checkpoint")); testutil_assert(sscanf(ts_string, "%" SCNx64, &stable) == 1); printf("Checkpoint %d complete at stable %" PRIu64 ".\n", i, stable); fflush(stdout); /* * Create the checkpoint file so that the parent process knows * at least one checkpoint has finished and can start its * timer. */ if (first_ckpt) { testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL); first_ckpt = false; testutil_checksys(fclose(fp) != 0); } } /* NOTREACHED */ }
static void * ops(void *arg) { TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; WT_SESSION *session; WT_ITEM key, value; uint64_t keyno, ckpt_op, session_op; uint32_t op; uint8_t *keybuf, *valbuf; u_int np; int ckpt_available, dir, insert, intxn, notfound, readonly, ret; char *ckpt_config, ckpt_name[64]; tinfo = arg; /* Initialize the per-thread random number generator. */ __wt_random_init(&tinfo->rnd); conn = g.wts_conn; keybuf = valbuf = NULL; readonly = 0; /* -Wconditional-uninitialized */ /* Set up the default key and value buffers. */ key_gen_setup(&keybuf); val_gen_setup(&tinfo->rnd, &valbuf); /* Set the first operation where we'll create sessions and cursors. */ session_op = 0; session = NULL; cursor = cursor_insert = NULL; /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; ckpt_available = 0; for (intxn = 0; !tinfo->quit; ++tinfo->ops) { /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. */ if (intxn && (tinfo->ops == ckpt_op || tinfo->ops == session_op)) { if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; } /* Open up a new session and cursors. */ if (tinfo->ops == session_op || session == NULL || cursor == NULL) { if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); if ((ret = conn->open_session(conn, NULL, ops_session_config(&tinfo->rnd), &session)) != 0) die(ret, "connection.open_session"); /* * 10% of the time, perform some read-only operations * from a checkpoint. * * Skip that if we single-threaded and doing checks * against a Berkeley DB database, because that won't * work because the Berkeley DB database records won't * match the checkpoint. Also skip if we are using * LSM, because it doesn't support reads from * checkpoints. */ if (!SINGLETHREADED && !DATASOURCE("lsm") && ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) { if ((ret = session->open_cursor(session, g.uri, NULL, ckpt_name, &cursor)) != 0) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ session_op += 250; /* Checkpoints are read-only. */ readonly = 1; } else { /* * Open two cursors: one for overwriting and one * for append (if it's a column-store). * * The reason is when testing with existing * records, we don't track if a record was * deleted or not, which means we must use * cursor->insert with overwriting configured. * But, in column-store files where we're * testing with new, appended records, we don't * want to have to specify the record number, * which requires an append configuration. */ if ((ret = session->open_cursor(session, g.uri, NULL, "overwrite", &cursor)) != 0) die(ret, "session.open_cursor"); if ((g.type == FIX || g.type == VAR) && (ret = session->open_cursor(session, g.uri, NULL, "append", &cursor_insert)) != 0) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); /* Updates supported. */ readonly = 0; } } /* Checkpoint the database. */ if (tinfo->ops == ckpt_op && g.c_checkpoints) { /* * LSM and data-sources don't support named checkpoints, * and we can't drop a named checkpoint while there's a * cursor open on it, otherwise 20% of the time name the * checkpoint. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb") || DATASOURCE("lsm") || readonly || mmrand(&tinfo->rnd, 1, 5) == 1) ckpt_config = NULL; else { (void)snprintf(ckpt_name, sizeof(ckpt_name), "name=thread-%d", tinfo->id); ckpt_config = ckpt_name; } /* Named checkpoints lock out backups */ if (ckpt_config != NULL && (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: backup lock"); if ((ret = session->checkpoint(session, ckpt_config)) != 0) die(ret, "session.checkpoint%s%s", ckpt_config == NULL ? "" : ": ", ckpt_config == NULL ? "" : ckpt_config); if (ckpt_config != NULL && (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: backup lock"); /* Rephrase the checkpoint name for cursor open. */ if (ckpt_config == NULL) strcpy(ckpt_name, "checkpoint=WiredTigerCheckpoint"); else (void)snprintf(ckpt_name, sizeof(ckpt_name), "checkpoint=thread-%d", tinfo->id); ckpt_available = 1; /* Pick the next checkpoint operation. */ ckpt_op += mmrand(&tinfo->rnd, 5000, 20000); } /* * If we're not single-threaded and we're not in a transaction, * start a transaction 20% of the time. */ if (!SINGLETHREADED && !intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) { if ((ret = session->begin_transaction(session, NULL)) != 0) die(ret, "session.begin_transaction"); intxn = 1; } insert = notfound = 0; keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); key.data = keybuf; value.data = valbuf; /* * Perform some number of operations: the percentage of deletes, * inserts and writes are specified, reads are the rest. The * percentages don't have to add up to 100, a high percentage * of deletes will mean fewer inserts and writes. Modifications * are always followed by a read to confirm it worked. */ op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.type) { case ROW: /* * If deleting a non-existent record, the cursor * won't be positioned, and so can't do a next. */ if (row_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; case FIX: case VAR: if (col_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.type) { case ROW: if (row_insert( tinfo, cursor, &key, &value, keyno)) goto deadlock; insert = 1; break; case FIX: case VAR: /* * We can only append so many new records, if * we've reached that limit, update a record * instead of doing an insert. */ if (g.append_cnt >= g.append_max) goto skip_insert; /* Insert, then reset the insert cursor. */ if (col_insert(tinfo, cursor_insert, &key, &value, &keyno)) goto deadlock; if ((ret = cursor_insert->reset(cursor_insert)) != 0) die(ret, "cursor.reset"); insert = 1; break; } } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.type) { case ROW: if (row_update( tinfo, cursor, &key, &value, keyno)) goto deadlock; break; case FIX: case VAR: skip_insert: if (col_update(tinfo, cursor, &key, &value, keyno)) goto deadlock; break; } } else { ++tinfo->search; if (read_row(cursor, &key, keyno)) if (intxn) goto deadlock; continue; } /* * The cursor is positioned if we did any operation other than * insert, do a small number of next/prev cursor operations in * a random direction. */ if (!insert) { dir = (int)mmrand(&tinfo->rnd, 0, 1); for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) { if (notfound) break; if (nextprev(cursor, dir, ¬found)) goto deadlock; } } /* Read to confirm the operation. */ ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; /* Reset the cursor: there is no reason to keep pages pinned. */ if ((ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); /* * If we're in the transaction, commit 40% of the time and * rollback 10% of the time. */ if (intxn) switch (mmrand(&tinfo->rnd, 1, 10)) { case 1: case 2: case 3: case 4: /* 40% */ if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; break; case 5: /* 10% */ if (0) { deadlock: ++tinfo->deadlock; } if ((ret = session->rollback_transaction( session, NULL)) != 0) die(ret, "session.rollback_transaction"); ++tinfo->rollback; intxn = 0; break; default: break; } } if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); free(keybuf); free(valbuf); tinfo->state = TINFO_COMPLETE; return (NULL); }
/* * __wt_connection_init -- * Structure initialization for a just-created WT_CONNECTION_IMPL handle. */ int __wt_connection_init(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; u_int i; session = conn->default_session; for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) { TAILQ_INIT(&conn->dhhash[i]); /* Data handle hash lists */ TAILQ_INIT(&conn->fhhash[i]); /* File handle hash lists */ } TAILQ_INIT(&conn->dhqh); /* Data handle list */ TAILQ_INIT(&conn->dlhqh); /* Library list */ TAILQ_INIT(&conn->dsrcqh); /* Data source list */ TAILQ_INIT(&conn->fhqh); /* File list */ TAILQ_INIT(&conn->collqh); /* Collator list */ TAILQ_INIT(&conn->compqh); /* Compressor list */ TAILQ_INIT(&conn->encryptqh); /* Encryptor list */ TAILQ_INIT(&conn->extractorqh); /* Extractor list */ TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */ /* Setup the LSM work queues. */ TAILQ_INIT(&conn->lsm_manager.switchqh); TAILQ_INIT(&conn->lsm_manager.appqh); TAILQ_INIT(&conn->lsm_manager.managerqh); /* Random numbers. */ __wt_random_init(&session->rnd); /* Configuration. */ WT_RET(__wt_conn_config_init(session)); /* Statistics. */ WT_RET(__wt_stat_connection_init(session, conn)); /* Spinlocks. */ WT_RET(__wt_spin_init(session, &conn->api_lock, "api")); WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint); WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list); WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table")); WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata); WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema); WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table); WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ WT_RET(__wt_rwlock_alloc( session, &conn->hot_backup_lock, "hot backup")); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->page_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) WT_RET( __wt_spin_init(session, &conn->page_lock[i], "btree page")); /* Setup the spin locks for the LSM manager queues. */ WT_RET(__wt_spin_init(session, &conn->lsm_manager.app_lock, "LSM application queue lock")); WT_RET(__wt_spin_init(session, &conn->lsm_manager.manager_lock, "LSM manager queue lock")); WT_RET(__wt_spin_init( session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); WT_RET(__wt_cond_alloc( session, "LSM worker cond", false, &conn->lsm_manager.work_cond)); /* * Generation numbers. * * Start split generations at one. Threads publish this generation * number before examining tree structures, and zero when they leave. * We need to distinguish between threads that are in a tree before the * first split has happened, and threads that are not in a tree. */ conn->split_gen = 1; /* * Block manager. * XXX * If there's ever a second block manager, we'll want to make this * more opaque, but for now this is simpler. */ WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager")); for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */ TAILQ_INIT(&conn->blockqh); /* Block manager list */ return (0); }
/* * thread_run -- * Runner function for the worker threads. */ static WT_THREAD_RET thread_run(void *arg) { FILE *fp; WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable; WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *session; WT_THREAD_DATA *td; uint64_t i, stable_ts; int ret; char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL]; char kname[64], tscfg[64]; __wt_random_init(&rnd); memset(cbuf, 0, sizeof(cbuf)); memset(lbuf, 0, sizeof(lbuf)); memset(obuf, 0, sizeof(obuf)); memset(kname, 0, sizeof(kname)); td = (WT_THREAD_DATA *)arg; /* * Set up the separate file for checking. */ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->id)); (void)unlink(cbuf); testutil_checksys((fp = fopen(cbuf, "w")) == NULL); /* * Set to line buffering. But that is advisory only. We've seen * cases where the result files end up with partial lines. */ __wt_stream_set_line_buffer(fp); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); /* * Open a cursor to each table. */ if ((ret = session->open_cursor(session, uri_collection, NULL, NULL, &cur_coll)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection); if ((ret = session->open_cursor(session, uri_local, NULL, NULL, &cur_local)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local); if ((ret = session->open_cursor(session, uri_oplog, NULL, NULL, &cur_oplog)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog); if ((ret = session->open_cursor( session, stable_store, NULL, NULL, &cur_stable)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store); /* * Write our portion of the key space until we're killed. */ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->id, td->start); for (i = td->start; ; ++i) { if (use_ts) stable_ts = global_ts++; else stable_ts = 0; testutil_check(__wt_snprintf( kname, sizeof(kname), "%" PRIu64, i)); testutil_check(session->begin_transaction(session, NULL)); cur_coll->set_key(cur_coll, kname); cur_local->set_key(cur_local, kname); cur_oplog->set_key(cur_oplog, kname); /* * Put an informative string into the value so that it * can be viewed well in a binary dump. */ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->id, stable_ts, i)); testutil_check(__wt_snprintf(lbuf, sizeof(lbuf), "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->id, stable_ts, i)); testutil_check(__wt_snprintf(obuf, sizeof(obuf), "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->id, stable_ts, i)); data.size = __wt_random(&rnd) % MAX_VAL; data.data = cbuf; cur_coll->set_value(cur_coll, &data); if ((ret = cur_coll->insert(cur_coll)) != 0) testutil_die(ret, "WT_CURSOR.insert"); data.size = __wt_random(&rnd) % MAX_VAL; data.data = obuf; cur_oplog->set_value(cur_oplog, &data); if ((ret = cur_oplog->insert(cur_oplog)) != 0) testutil_die(ret, "WT_CURSOR.insert"); if (use_ts) { testutil_check(__wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, stable_ts)); testutil_check( session->commit_transaction(session, tscfg)); } else testutil_check( session->commit_transaction(session, NULL)); /* * Insert into the local table outside the timestamp txn. */ data.size = __wt_random(&rnd) % MAX_VAL; data.data = lbuf; cur_local->set_value(cur_local, &data); if ((ret = cur_local->insert(cur_local)) != 0) testutil_die(ret, "WT_CURSOR.insert"); /* * Every N records we will record our stable timestamp into the * stable table. That will define our threshold where we * expect to find records after recovery. */ if (i % STABLE_PERIOD == 0) { if (use_ts) { /* * Set both the oldest and stable timestamp * so that we don't need to maintain read * availability at older timestamps. */ testutil_check(__wt_snprintf( tscfg, sizeof(tscfg), "oldest_timestamp=%" PRIx64 ",stable_timestamp=%" PRIx64, stable_ts, stable_ts)); testutil_check( td->conn->set_timestamp(td->conn, tscfg)); } cur_stable->set_key(cur_stable, td->id); cur_stable->set_value(cur_stable, stable_ts); testutil_check(cur_stable->insert(cur_stable)); } /* * Save the timestamp and key separately for checking later. */ if (fprintf(fp, "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0) testutil_die(EIO, "fprintf"); } /* NOTREACHED */ }
/* * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ static void fill_db(void) { FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *session; uint64_t i; int ret; uint8_t buf[MAX_VAL]; __wt_random_init(&rnd); memset(buf, 0, sizeof(buf)); /* * Initialize the first 25% to random values. Leave a bunch of data * space at the end to emphasize zero data. */ for (i = 0; i < MAX_VAL/4; i++) buf[i] = (uint8_t)__wt_random(&rnd); /* * Run in the home directory so that the records file is in there too. */ chdir(home); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->create(session, uri, "key_format=Q,value_format=u")) != 0) testutil_die(ret, "WT_SESSION.create: %s", uri); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(RECORDS_FILE); if ((fp = fopen(RECORDS_FILE, "w")) == NULL) testutil_die(errno, "fopen"); /* * Set to no buffering. */ setvbuf(fp, NULL, _IONBF, 0); /* * Write data into the table until we are killed by the parent. * The data in the buffer is already set to random content. */ data.data = buf; for (i = 0;; ++i) { data.size = __wt_random(&rnd) % MAX_VAL; cursor->set_key(cursor, i); cursor->set_value(cursor, &data); if ((ret = cursor->insert(cursor)) != 0) testutil_die(ret, "WT_CURSOR.insert"); /* * Save the key separately for checking later. */ if (fprintf(fp, "%" PRIu64 "\n", i) == -1) testutil_die(errno, "fprintf"); if (i % 5000) __wt_yield(); } }
int main(int argc, char *argv[]) { FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; WT_RAND_STATE rnd; uint64_t key; uint32_t absent, count, timeout; int ch, status, ret; pid_t pid; char *working_dir; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; else ++progname; working_dir = NULL; timeout = 10; while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF) switch (ch) { case 'h': working_dir = __wt_optarg; break; case 't': timeout = (uint32_t)atoi(__wt_optarg); break; default: usage(); } argc -= __wt_optind; argv += __wt_optind; if (argc != 0) usage(); testutil_work_dir_from_path(home, 512, working_dir); testutil_make_work_dir(home); /* * Fork a child to insert as many items. We will then randomly * kill the child, run recovery and make sure all items we wrote * exist after recovery runs. */ if ((pid = fork()) < 0) testutil_die(errno, "fork"); if (pid == 0) { /* child */ fill_db(); return (EXIT_SUCCESS); } /* parent */ __wt_random_init(&rnd); /* Sleep for the configured amount of time before killing the child. */ printf("Parent: sleep %" PRIu32 " seconds, then kill child\n", timeout); sleep(timeout); /* * !!! It should be plenty long enough to make sure more than one * log file exists. If wanted, that check would be added here. */ printf("Kill child\n"); if (kill(pid, SIGKILL) != 0) testutil_die(errno, "kill"); waitpid(pid, &status, 0); /* * !!! If we wanted to take a copy of the directory before recovery, * this is the place to do it. */ chdir(home); printf("Open database, run recovery and verify content\n"); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); if ((fp = fopen(RECORDS_FILE, "r")) == NULL) testutil_die(errno, "fopen"); /* * For every key in the saved file, verify that the key exists * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ for (absent = count = 0;; ++count) { ret = fscanf(fp, "%" SCNu64 "\n", &key); if (ret != EOF && ret != 1) testutil_die(errno, "fscanf"); if (ret == EOF) break; cursor->set_key(cursor, key); if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) testutil_die(ret, "search"); printf("no record with key %" PRIu64 "\n", key); ++absent; } } fclose(fp); if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (absent) { printf("%u record(s) absent from %u\n", absent, count); return (EXIT_FAILURE); } printf("%u records verified\n", count); return (EXIT_SUCCESS); }
/* * thread_run -- * Runner function for the worker threads. */ static WT_THREAD_RET thread_run(void *arg) { FILE *fp; WT_CURSOR *cur_coll, *cur_local, *cur_oplog; WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *prepared_session, *session; THREAD_DATA *td; uint64_t i, active_ts; char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL]; char kname[64], tscfg[64], uri[128]; bool use_prep; __wt_random_init(&rnd); memset(cbuf, 0, sizeof(cbuf)); memset(lbuf, 0, sizeof(lbuf)); memset(obuf, 0, sizeof(obuf)); memset(kname, 0, sizeof(kname)); prepared_session = NULL; td = (THREAD_DATA *)arg; /* * Set up the separate file for checking. */ testutil_check(__wt_snprintf( cbuf, sizeof(cbuf), RECORDS_FILE, td->info)); (void)unlink(cbuf); testutil_checksys((fp = fopen(cbuf, "w")) == NULL); /* * Set to line buffering. But that is advisory only. We've seen * cases where the result files end up with partial lines. */ __wt_stream_set_line_buffer(fp); /* * Have 10% of the threads use prepared transactions if timestamps * are in use. Thread numbers start at 0 so we're always guaranteed * that at least one thread is using prepared transactions. */ use_prep = (use_ts && td->info % PREPARE_PCT == 0) ? true : false; /* * For the prepared case we have two sessions so that the oplog session * can have its own transaction in parallel with the collection session * We need this because prepared transactions cannot have any operations * that modify a table that is logged. But we also want to test mixed * logged and not-logged transactions. */ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session)); if (use_prep) testutil_check(td->conn->open_session( td->conn, NULL, NULL, &prepared_session)); /* * Open a cursor to each table. */ testutil_check(__wt_snprintf( uri, sizeof(uri), "%s:%s", table_pfx, uri_collection)); if (use_prep) testutil_check(prepared_session->open_cursor(prepared_session, uri, NULL, NULL, &cur_coll)); else testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_coll)); testutil_check(__wt_snprintf( uri, sizeof(uri), "%s:%s", table_pfx, uri_local)); if (use_prep) testutil_check(prepared_session->open_cursor(prepared_session, uri, NULL, NULL, &cur_local)); else testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_local)); testutil_check(__wt_snprintf( uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog)); testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_oplog)); /* * Write our portion of the key space until we're killed. */ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->info, td->start); active_ts = 0; for (i = td->start;; ++i) { testutil_check(__wt_snprintf( kname, sizeof(kname), "%" PRIu64, i)); testutil_check(session->begin_transaction(session, NULL)); if (use_prep) testutil_check(prepared_session->begin_transaction( prepared_session, NULL)); if (use_ts) { testutil_check(pthread_rwlock_rdlock(&ts_lock)); active_ts = __wt_atomic_addv64(&global_ts, 1); testutil_check(__wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts)); /* * Set the transaction's timestamp now before performing * the operation. If we are using prepared transactions, * set the timestamp for the session used for oplog. The * collection session in that case would continue to use * this timestamp. */ testutil_check(session->timestamp_transaction( session, tscfg)); testutil_check(pthread_rwlock_unlock(&ts_lock)); } cur_coll->set_key(cur_coll, kname); cur_local->set_key(cur_local, kname); cur_oplog->set_key(cur_oplog, kname); /* * Put an informative string into the value so that it * can be viewed well in a binary dump. */ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i)); testutil_check(__wt_snprintf(lbuf, sizeof(lbuf), "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i)); testutil_check(__wt_snprintf(obuf, sizeof(obuf), "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i)); data.size = __wt_random(&rnd) % MAX_VAL; data.data = cbuf; cur_coll->set_value(cur_coll, &data); testutil_check(cur_coll->insert(cur_coll)); data.size = __wt_random(&rnd) % MAX_VAL; data.data = obuf; cur_oplog->set_value(cur_oplog, &data); testutil_check(cur_oplog->insert(cur_oplog)); if (use_prep) { /* * Run with prepare every once in a while. And also * yield after prepare sometimes too. This is only done * on the collection session. */ if (i % PREPARE_FREQ == 0) { testutil_check(__wt_snprintf(tscfg, sizeof(tscfg), "prepare_timestamp=%" PRIx64, active_ts)); testutil_check( prepared_session->prepare_transaction( prepared_session, tscfg)); if (i % PREPARE_YIELD == 0) __wt_yield(); testutil_check( __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64 ",durable_timestamp=%" PRIx64, active_ts, active_ts)); } else testutil_check( __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts)); testutil_check( prepared_session->commit_transaction( prepared_session, tscfg)); } testutil_check( session->commit_transaction(session, NULL)); /* * Insert into the local table outside the timestamp txn. */ data.size = __wt_random(&rnd) % MAX_VAL; data.data = lbuf; cur_local->set_value(cur_local, &data); testutil_check(cur_local->insert(cur_local)); /* * Save the timestamp and key separately for checking later. */ if (fprintf(fp, "%" PRIu64 " %" PRIu64 "\n", active_ts, i) < 0) testutil_die(EIO, "fprintf"); } /* NOTREACHED */ }
/* * __wt_connection_init -- * Structure initialization for a just-created WT_CONNECTION_IMPL handle. */ int __wt_connection_init(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; u_int i; session = conn->default_session; for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) { TAILQ_INIT(&conn->dhhash[i]); /* Data handle hash lists */ TAILQ_INIT(&conn->fhhash[i]); /* File handle hash lists */ } TAILQ_INIT(&conn->dhqh); /* Data handle list */ TAILQ_INIT(&conn->dlhqh); /* Library list */ TAILQ_INIT(&conn->dsrcqh); /* Data source list */ TAILQ_INIT(&conn->fhqh); /* File list */ TAILQ_INIT(&conn->collqh); /* Collator list */ TAILQ_INIT(&conn->compqh); /* Compressor list */ TAILQ_INIT(&conn->encryptqh); /* Encryptor list */ TAILQ_INIT(&conn->extractorqh); /* Extractor list */ TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */ /* Setup the LSM work queues. */ TAILQ_INIT(&conn->lsm_manager.switchqh); TAILQ_INIT(&conn->lsm_manager.appqh); TAILQ_INIT(&conn->lsm_manager.managerqh); /* Random numbers. */ __wt_random_init(&session->rnd); /* Configuration. */ WT_RET(__wt_conn_config_init(session)); /* Statistics. */ WT_RET(__wt_stat_connection_init(session, conn)); /* Spinlocks. */ WT_RET(__wt_spin_init(session, &conn->api_lock, "api")); WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint); WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata); WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_SPIN_INIT_SESSION_TRACKED(session, &conn->schema_lock, schema); WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ WT_RWLOCK_INIT_SESSION_TRACKED(session, &conn->dhandle_lock, dhandle); WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock)); WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table); /* Setup the spin locks for the LSM manager queues. */ WT_RET(__wt_spin_init(session, &conn->lsm_manager.app_lock, "LSM application queue lock")); WT_RET(__wt_spin_init(session, &conn->lsm_manager.manager_lock, "LSM manager queue lock")); WT_RET(__wt_spin_init( session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); WT_RET(__wt_cond_alloc( session, "LSM worker cond", &conn->lsm_manager.work_cond)); /* Initialize the generation manager. */ __wt_gen_init(session); /* * Block manager. * XXX * If there's ever a second block manager, we'll want to make this * more opaque, but for now this is simpler. */ WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager")); for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */ TAILQ_INIT(&conn->blockqh); /* Block manager list */ return (0); }
int main(int argc, char *argv[]) { time_t start; int ch, i, onerun, reps, ret; const char *config, *home; config = NULL; if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) g.progname = argv[0]; else ++g.progname; #if 0 /* Configure the GNU malloc for debugging. */ (void)setenv("MALLOC_CHECK_", "2", 1); #endif #if 0 /* Configure the FreeBSD malloc for debugging. */ (void)setenv("MALLOC_OPTIONS", "AJ", 1); #endif /* Track progress unless we're re-directing output to a file. */ g.track = isatty(1) ? 1 : 0; /* Set values from the command line. */ home = NULL; onerun = 0; while ((ch = __wt_getopt( g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ onerun = 1; break; case 'C': /* wiredtiger_open config */ g.config_open = __wt_optarg; break; case 'c': /* Configuration from a file */ config = __wt_optarg; break; case 'H': g.helium_mount = __wt_optarg; break; case 'h': home = __wt_optarg; break; case 'L': /* Re-direct output to a log */ /* * The -l option is a superset of -L, ignore -L if we * have already configured logging for operations. */ if (g.logging == 0) g.logging = LOG_FILE; break; case 'l': /* Turn on operation logging */ g.logging = LOG_OPS; break; case 'q': /* Quiet */ g.track = 0; break; case 'r': /* Replay a run */ g.replay = 1; break; default: usage(); } argc -= __wt_optind; argv += __wt_optind; /* * Initialize the global RNG. Start with the standard seeds, and then * use seconds since the Epoch modulo a prime to run the RNG for some * number of steps, so we don't start with the same values every time. */ __wt_random_init(&g.rnd); for (i = (int)time(NULL) % 10007; i > 0; --i) (void)__wt_random(&g.rnd); /* Set up paths. */ path_setup(home); /* If it's a replay, use the home directory's CONFIG file. */ if (g.replay) { if (config != NULL) die(EINVAL, "-c incompatible with -r"); if (access(g.home_config, R_OK) != 0) die(ENOENT, "%s", g.home_config); config = g.home_config; } /* * If we weren't given a configuration file, set values from "CONFIG", * if it exists. * * Small hack to ignore any CONFIG file named ".", that just makes it * possible to ignore any local CONFIG file, used when running checks. */ if (config == NULL && access("CONFIG", R_OK) == 0) config = "CONFIG"; if (config != NULL && strcmp(config, ".") != 0) config_file(config); /* * The rest of the arguments are individual configurations that modify * the base configuration. */ for (; *argv != NULL; ++argv) config_single(*argv, 1); /* * Multithreaded runs can be replayed: it's useful and we'll get the * configuration correct. Obviously the order of operations changes, * warn the user. */ if (g.replay && !SINGLETHREADED) printf("Warning: replaying a threaded run\n"); /* * Single-threaded runs historically exited after a single replay, which * makes sense when you're debugging, leave that semantic in place. */ if (g.replay && SINGLETHREADED) g.c_runs = 1; /* * Let the command line -1 flag override runs configured from other * sources. */ if (onerun) g.c_runs = 1; /* * Initialize locks to single-thread named checkpoints and backups, last * last-record updates, and failures. */ if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0) die(ret, "pthread_rwlock_init: append lock"); if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0) die(ret, "pthread_rwlock_init: backup lock"); if ((ret = pthread_rwlock_init(&g.death_lock, NULL)) != 0) die(ret, "pthread_rwlock_init: death lock"); printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { startup(); /* Start a run */ config_setup(); /* Run configuration */ config_print(0); /* Dump run configuration */ key_len_setup(); /* Setup keys */ start = time(NULL); track("starting up", 0ULL, NULL); #ifdef HAVE_BERKELEY_DB if (SINGLETHREADED) bdb_open(); /* Initial file config */ #endif wts_open(g.home, 1, &g.wts_conn); wts_create(); wts_load(); /* Load initial records */ wts_verify("post-bulk verify"); /* Verify */ /* * If we're not doing any operations, scan the bulk-load, copy * the statistics and we're done. Otherwise, loop reading and * operations, with a verify after each set. */ if (g.c_timer == 0 && g.c_ops == 0) { wts_read_scan(); /* Read scan */ wts_stats(); /* Statistics */ } else for (reps = 1; reps <= FORMAT_OPERATION_REPS; ++reps) { wts_read_scan(); /* Read scan */ /* Operations */ wts_ops(reps == FORMAT_OPERATION_REPS); /* * Copy out the run's statistics after the last * set of operations. * * XXX * Verify closes the underlying handle and * discards the statistics, read them first. */ if (reps == FORMAT_OPERATION_REPS) wts_stats(); /* Verify */ wts_verify("post-ops verify"); } track("shutting down", 0ULL, NULL); #ifdef HAVE_BERKELEY_DB if (SINGLETHREADED) bdb_close(); #endif wts_close(); /* * If single-threaded, we can dump and compare the WiredTiger * and Berkeley DB data sets. */ if (SINGLETHREADED) wts_dump("standard", 1); /* * Salvage testing. */ wts_salvage(); /* Overwrite the progress line with a completion line. */ if (g.track) printf("\r%78s\r", " "); printf("%4d: %s, %s (%.0f seconds)\n", g.run_cnt, g.c_data_source, g.c_file_type, difftime(time(NULL), start)); fflush(stdout); } /* Flush/close any logging information. */ fclose_and_clear(&g.logfp); fclose_and_clear(&g.randfp); config_print(0); if ((ret = pthread_rwlock_destroy(&g.append_lock)) != 0) die(ret, "pthread_rwlock_destroy: append lock"); if ((ret = pthread_rwlock_destroy(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_destroy: backup lock"); config_clear(); return (EXIT_SUCCESS); }
/* * real_worker -- * A single worker thread that transactionally updates all tables with * consistent values. */ static int real_worker(void) { WT_CURSOR **cursors; WT_RAND_STATE rnd; WT_SESSION *session; u_int i, keyno; int j, ret, t_ret; ret = t_ret = 0; __wt_random_init(&rnd); if ((cursors = calloc( (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL) return (log_print_err("malloc", ENOMEM, 1)); if ((ret = g.conn->open_session( g.conn, NULL, "isolation=snapshot", &session)) != 0) { (void)log_print_err("conn.open_session", ret, 1); goto err; } for (j = 0; j < g.ntables; j++) if ((ret = session->open_cursor(session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) { (void)log_print_err("session.open_cursor", ret, 1); goto err; } for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) { if ((ret = session->begin_transaction(session, NULL)) != 0) { (void)log_print_err( "real_worker:begin_transaction", ret, 1); goto err; } keyno = __wt_random(&rnd) % g.nkeys + 1; for (j = 0; j < g.ntables; j++) { if ((ret = worker_op(cursors[j], keyno, i)) != 0) break; } if (ret == 0) { if ((ret = session->commit_transaction( session, NULL)) != 0) { (void)log_print_err( "real_worker:commit_transaction", ret, 1); goto err; } } else if (ret == WT_ROLLBACK) { if ((ret = session->rollback_transaction( session, NULL)) != 0) { (void)log_print_err( "real_worker:rollback_transaction", ret, 1); goto err; } } else { (void)log_print_err("worker op failed", ret, 1); goto err; } } err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0) { ret = t_ret; (void)log_print_err("session.close", ret, 1); } free(cursors); return (ret); }