void WiredTigerSessionCache::waitUntilDurable(bool forceCheckpoint) { const int shuttingDown = _shuttingDown.fetchAndAdd(1); ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); }); uassert(ErrorCodes::ShutdownInProgress, "Cannot wait for durability because a shutdown is in progress", !(shuttingDown & kShuttingDownMask)); // When forcing a checkpoint with journaling enabled, don't synchronize with other // waiters, as a log flush is much cheaper than a full checkpoint. if (forceCheckpoint && _engine->isDurable()) { UniqueWiredTigerSession session = getSession(); WT_SESSION* s = session->getSession(); { stdx::unique_lock<stdx::mutex> lk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); invariantWTOK(s->checkpoint(s, NULL)); _journalListener->onDurable(token); } LOG(4) << "created checkpoint (forced)"; return; } uint32_t start = _lastSyncTime.load(); // Do the remainder in a critical section that ensures only a single thread at a time // will attempt to synchronize. stdx::unique_lock<stdx::mutex> lk(_lastSyncMutex); uint32_t current = _lastSyncTime.loadRelaxed(); // synchronized with writes through mutex if (current != start) { // Someone else synced already since we read lastSyncTime, so we're done! return; } _lastSyncTime.store(current + 1); // Nobody has synched yet, so we have to sync ourselves. auto session = getSession(); WT_SESSION* s = session->getSession(); // This gets the token (OpTime) from the last write, before flushing (either the journal, or a // checkpoint), and then reports that token (OpTime) as a durable write. stdx::unique_lock<stdx::mutex> jlk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); // Use the journal when available, or a checkpoint otherwise. if (_engine->isDurable()) { invariantWTOK(s->log_flush(s, "sync=on")); LOG(4) << "flushed journal"; } else { invariantWTOK(s->checkpoint(s, NULL)); LOG(4) << "created checkpoint"; } _journalListener->onDurable(token); }
int main(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; int ret; ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn); ret = conn->open_session(conn, NULL, NULL, &session); ret = session->create( session, "table:access", "key_format=S,value_format=S"); ret = session->open_cursor( session, "table:access", NULL, NULL, &cursor); cursor->set_key(cursor, "key"); cursor->set_value(cursor, "value"); ret = cursor->insert(cursor); cursor->close(cursor); ret = session->checkpoint(session, NULL); ret = print_database_stats(session); ret = print_file_stats(session); ret = print_overflow_pages(session); ret = print_derived_stats(session); return (conn->close(conn, NULL) == 0 ? ret : EXIT_FAILURE); }
/* * Function for repeatedly running checkpoint operations. */ static WT_THREAD_RET do_checkpoints(void *_opts) { TEST_OPTS *opts; WT_SESSION *session; time_t now, start; int ret; opts = (TEST_OPTS *)_opts; (void)time(&start); (void)time(&now); while (difftime(now, start) < RUNTIME) { testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); if ((ret = session->checkpoint(session, "force")) != 0) if (ret != EBUSY && ret != ENOENT) testutil_die(ret, "session.checkpoint"); testutil_check(session->close(session, NULL)); /* * A short sleep to let operations process and avoid back to * back checkpoints locking up resources. */ sleep(1); (void)time(&now); } return (WT_THREAD_RET_VALUE); }
int WiredTigerKVEngine::flushAllFiles(bool sync) { LOG(1) << "WiredTigerKVEngine::flushAllFiles"; syncSizeInfo(true); WiredTigerSession session(_conn); WT_SESSION* s = session.getSession(); invariantWTOK(s->checkpoint(s, NULL)); return 1; }
void WTServerHandler:: checkpoint() { int rc = 0; WT_SESSION *sess; rc = conn_->open_session(conn_, NULL, NULL, &sess); rc = sess->checkpoint(sess, NULL); if (rc != 0) { fprintf(stderr, "WT_SESSION::checkpoint error %s\n", wiredtiger_strerror(rc)); } sess->close(sess, NULL); }
/* * thread_ckpt_run -- * Runner function for the checkpoint thread. */ static WT_THREAD_RET thread_ckpt_run(void *arg) { FILE *fp; WT_RAND_STATE rnd; WT_SESSION *session; WT_THREAD_DATA *td; uint64_t ts; uint32_t sleep_time; int i, ret; bool first_ckpt; __wt_random_init(&rnd); td = (WT_THREAD_DATA *)arg; /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(ckpt_file); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); first_ckpt = true; ts = 0; for (i = 0; ;++i) { sleep_time = __wt_random(&rnd) % MAX_CKPT_INTERVAL; sleep(sleep_time); if (use_ts) ts = global_ts; /* * Since this is the default, send in this string even if * running without timestamps. */ testutil_check(session->checkpoint( session, "use_timestamp=true")); printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n", i, ts); fflush(stdout); /* * Create the checkpoint file so that the parent process knows * at least one checkpoint has finished and can start its * timer. */ if (first_ckpt) { testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL); first_ckpt = false; testutil_checksys(fclose(fp) != 0); } } /* NOTREACHED */ }
/* * thread_ckpt_run -- * Runner function for the checkpoint thread. */ static WT_THREAD_RET thread_ckpt_run(void *arg) { FILE *fp; WT_RAND_STATE rnd; WT_SESSION *session; THREAD_DATA *td; uint64_t stable; uint32_t sleep_time; int i; bool first_ckpt; char ts_string[WT_TS_HEX_STRING_SIZE]; __wt_random_init(&rnd); td = (THREAD_DATA *)arg; /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(ckpt_file); testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session)); first_ckpt = true; for (i = 0; ;++i) { sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL; sleep(sleep_time); /* * Since this is the default, send in this string even if * running without timestamps. */ testutil_check(session->checkpoint( session, "use_timestamp=true")); testutil_check(td->conn->query_timestamp( td->conn, ts_string, "get=last_checkpoint")); testutil_assert(sscanf(ts_string, "%" SCNx64, &stable) == 1); printf("Checkpoint %d complete at stable %" PRIu64 ".\n", i, stable); fflush(stdout); /* * Create the checkpoint file so that the parent process knows * at least one checkpoint has finished and can start its * timer. */ if (first_ckpt) { testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL); first_ckpt = false; testutil_checksys(fclose(fp) != 0); } } /* NOTREACHED */ }
/* * wt_shutdown -- * Flush the file to disk and shut down the WiredTiger connection. */ static void wt_shutdown(void) { WT_SESSION *session; int ret; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); if ((ret = session->checkpoint(session, NULL)) != 0) testutil_die(ret, "session.checkpoint"); if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "conn.close"); }
void load(SHARED_CONFIG *cfg, const char *name) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM *value, _value; WT_SESSION *session; size_t len; uint64_t keyno; char keybuf[64], valuebuf[64]; conn = cfg->conn; file_create(cfg, name); testutil_check(conn->open_session(conn, NULL, NULL, &session)); testutil_check( session->open_cursor(session, name, NULL, "bulk", &cursor)); value = &_value; for (keyno = 1; keyno <= cfg->nkeys; ++keyno) { if (cfg->ftype == ROW) { testutil_check(__wt_snprintf( keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); value->data = valuebuf; if (cfg->ftype == FIX) cursor->set_value(cursor, 0x01); else { testutil_check(__wt_snprintf_len_set( valuebuf, sizeof(valuebuf), &len, "%37" PRIu64, keyno)); value->size = (uint32_t)len; cursor->set_value(cursor, value); } testutil_check(cursor->insert(cursor)); } /* Setup the starting key range for the workload phase. */ cfg->key_range = cfg->nkeys; testutil_check(cursor->close(cursor)); testutil_check(session->checkpoint(session, NULL)); testutil_check(session->close(session, NULL)); }
void * checkpoint_worker(void *arg) { CONFIG *cfg; WT_CONNECTION *conn; WT_SESSION *session; int ret; struct timeval e, s; uint32_t i; uint64_t ms; session = NULL; cfg = (CONFIG *)arg; conn = cfg->conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { lprintf(cfg, ret, 0, "open_session failed in checkpoint thread."); goto err; } while (g_util_running) { /* * TODO: do we care how long the checkpoint takes? */ /* Break the sleep up, so we notice interrupts faster. */ for (i = 0; i < cfg->checkpoint_interval; i++) { sleep(cfg->report_interval); if (!g_util_running) break; } gettimeofday(&s, NULL); if ((ret = session->checkpoint(session, NULL)) != 0) /* Report errors and continue. */ lprintf(cfg, ret, 0, "Checkpoint failed."); gettimeofday(&e, NULL); ms = (e.tv_sec * 1000) + (e.tv_usec / 1000.0); ms -= (s.tv_sec * 1000) + (s.tv_usec / 1000.0); lprintf(cfg, 0, 1, "Finished checkpoint in %" PRIu64 " ms.", ms); } err: if (session != NULL) session->close(session, NULL); return (arg); }
/* * wt_shutdown -- * Flush the file to disk and shut down the WiredTiger connection. */ static void wt_shutdown(void) { WT_SESSION *session; int ret; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) die("conn.session", ret); if ((ret = session->verify(session, FNAME, NULL)) != 0) die("session.verify", ret); if ((ret = session->checkpoint(session, NULL)) != 0) die("session.checkpoint", ret); if ((ret = conn->close(conn, NULL)) != 0) die("conn.close", ret); }
/* * wt_shutdown -- * Flush the file to disk and shut down the WiredTiger connection. */ static void wt_shutdown(SHARED_CONFIG *cfg) { WT_CONNECTION *conn; WT_SESSION *session; int ret; conn = cfg->conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); if ((ret = session->checkpoint(session, NULL)) != 0) testutil_die(ret, "session.checkpoint"); if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "conn.close"); }
/* * real_checkpointer -- * Do the work of creating checkpoints and then verifying them. Also * responsible for finishing in a timely fashion. */ static int real_checkpointer(void) { WT_SESSION *session; char *checkpoint_config, _buf[128]; int ret; if (g.running == 0) return (log_print_err( "Checkpoint thread started stopped\n", EINVAL, 1)); while (g.ntables > g.ntables_created) sched_yield(); if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) return (log_print_err("conn.open_session", ret, 1)); if (strncmp(g.checkpoint_name, "WiredTigerCheckpoint", strlen("WiredTigerCheckpoint")) == 0) checkpoint_config = NULL; else { checkpoint_config = _buf; snprintf(checkpoint_config, 128, "name=%s", g.checkpoint_name); } while (g.running) { /* Execute a checkpoint */ if ((ret = session->checkpoint( session, checkpoint_config)) != 0) return (log_print_err("session.checkpoint", ret, 1)); printf("Finished a checkpoint\n"); if (!g.running) goto done; /* Verify the content of the checkpoint. */ if ((ret = verify_checkpoint(session)) != 0) return (log_print_err("verify_checkpoint", ret, 1)); } done: if ((ret = session->close(session, NULL)) != 0) return (log_print_err("session.close", ret, 1)); return (0); }
int main(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; int ret; /* * Create a clean test directory for this run of the test program if the * environment variable isn't already set (as is done by make check). */ if (getenv("WIREDTIGER_HOME") == NULL) { home = "WT_HOME"; ret = system("rm -rf WT_HOME && mkdir WT_HOME"); } else home = NULL; ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn); ret = conn->open_session(conn, NULL, NULL, &session); ret = session->create( session, "table:access", "key_format=S,value_format=S"); ret = session->open_cursor( session, "table:access", NULL, NULL, &cursor); cursor->set_key(cursor, "key"); cursor->set_value(cursor, "value"); ret = cursor->insert(cursor); ret = cursor->close(cursor); ret = session->checkpoint(session, NULL); ret = print_database_stats(session); ret = print_file_stats(session); ret = print_overflow_pages(session); ret = print_derived_stats(session); return (conn->close(conn, NULL) == 0 ? ret : EXIT_FAILURE); }
void obj_checkpoint(void) { WT_SESSION *session; int ret; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); /* * Force the checkpoint so it has to be taken. Forced checkpoints can * race with other metadata operations and return EBUSY - we'd expect * applications using forced checkpoints to retry on EBUSY. */ if ((ret = session->checkpoint(session, "force")) != 0) if (ret != EBUSY && ret != ENOENT) testutil_die(ret, "session.checkpoint"); if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); }
static void uri_init(void) { WT_CURSOR *cursor; WT_SESSION *session; u_int i, key; char buf[128]; for (i = 0; i < uris; ++i) if (uri_list[i] == NULL) { testutil_check( __wt_snprintf(buf, sizeof(buf), "table:%u", i)); uri_list[i] = dstrdup(buf); } testutil_check(conn->open_session(conn, NULL, NULL, &session)); /* Initialize the file contents. */ for (i = 0; i < uris; ++i) { testutil_check(__wt_snprintf(buf, sizeof(buf), "key_format=S,value_format=S," "allocation_size=4K,leaf_page_max=32KB,")); testutil_check(session->create(session, uri_list[i], buf)); testutil_check(session->open_cursor( session, uri_list[i], NULL, NULL, &cursor)); for (key = 1; key < MAXKEY; ++key) { testutil_check(__wt_snprintf( buf, sizeof(buf), "key:%020u", key)); cursor->set_key(cursor, buf); cursor->set_value(cursor, buf); testutil_check(cursor->insert(cursor)); } testutil_check(cursor->close(cursor)); } /* Create a checkpoint we can use for readonly handles. */ testutil_check(session->checkpoint(session, NULL)); testutil_check(session->close(session, NULL)); }
int main(int argc, char *argv[]) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; home = example_setup(argc, argv); error_check( wiredtiger_open(home, NULL, "create,statistics=(all)", &conn)); error_check(conn->open_session(conn, NULL, NULL, &session)); error_check(session->create(session, "table:access", "key_format=S,value_format=S,columns=(k,v)")); error_check(session->open_cursor( session, "table:access", NULL, NULL, &cursor)); cursor->set_key(cursor, "key"); cursor->set_value(cursor, "value"); error_check(cursor->insert(cursor)); error_check(cursor->close(cursor)); error_check(session->checkpoint(session, NULL)); print_database_stats(session); print_file_stats(session); print_join_cursor_stats(session); print_overflow_pages(session); print_derived_stats(session); error_check(conn->close(conn, NULL)); return (EXIT_SUCCESS); }
static void * ops(void *arg) { TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; WT_SESSION *session; WT_ITEM key, value; uint64_t keyno, ckpt_op, session_op; uint32_t op; uint8_t *keybuf, *valbuf; u_int np; int ckpt_available, dir, insert, intxn, notfound, readonly, ret; char *ckpt_config, ckpt_name[64]; tinfo = arg; /* Initialize the per-thread random number generator. */ __wt_random_init(&tinfo->rnd); conn = g.wts_conn; keybuf = valbuf = NULL; readonly = 0; /* -Wconditional-uninitialized */ /* Set up the default key and value buffers. */ key_gen_setup(&keybuf); val_gen_setup(&tinfo->rnd, &valbuf); /* Set the first operation where we'll create sessions and cursors. */ session_op = 0; session = NULL; cursor = cursor_insert = NULL; /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; ckpt_available = 0; for (intxn = 0; !tinfo->quit; ++tinfo->ops) { /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. */ if (intxn && (tinfo->ops == ckpt_op || tinfo->ops == session_op)) { if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; } /* Open up a new session and cursors. */ if (tinfo->ops == session_op || session == NULL || cursor == NULL) { if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); if ((ret = conn->open_session(conn, NULL, ops_session_config(&tinfo->rnd), &session)) != 0) die(ret, "connection.open_session"); /* * 10% of the time, perform some read-only operations * from a checkpoint. * * Skip that if we single-threaded and doing checks * against a Berkeley DB database, because that won't * work because the Berkeley DB database records won't * match the checkpoint. Also skip if we are using * LSM, because it doesn't support reads from * checkpoints. */ if (!SINGLETHREADED && !DATASOURCE("lsm") && ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) { if ((ret = session->open_cursor(session, g.uri, NULL, ckpt_name, &cursor)) != 0) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ session_op += 250; /* Checkpoints are read-only. */ readonly = 1; } else { /* * Open two cursors: one for overwriting and one * for append (if it's a column-store). * * The reason is when testing with existing * records, we don't track if a record was * deleted or not, which means we must use * cursor->insert with overwriting configured. * But, in column-store files where we're * testing with new, appended records, we don't * want to have to specify the record number, * which requires an append configuration. */ if ((ret = session->open_cursor(session, g.uri, NULL, "overwrite", &cursor)) != 0) die(ret, "session.open_cursor"); if ((g.type == FIX || g.type == VAR) && (ret = session->open_cursor(session, g.uri, NULL, "append", &cursor_insert)) != 0) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); /* Updates supported. */ readonly = 0; } } /* Checkpoint the database. */ if (tinfo->ops == ckpt_op && g.c_checkpoints) { /* * LSM and data-sources don't support named checkpoints, * and we can't drop a named checkpoint while there's a * cursor open on it, otherwise 20% of the time name the * checkpoint. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb") || DATASOURCE("lsm") || readonly || mmrand(&tinfo->rnd, 1, 5) == 1) ckpt_config = NULL; else { (void)snprintf(ckpt_name, sizeof(ckpt_name), "name=thread-%d", tinfo->id); ckpt_config = ckpt_name; } /* Named checkpoints lock out backups */ if (ckpt_config != NULL && (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: backup lock"); if ((ret = session->checkpoint(session, ckpt_config)) != 0) die(ret, "session.checkpoint%s%s", ckpt_config == NULL ? "" : ": ", ckpt_config == NULL ? "" : ckpt_config); if (ckpt_config != NULL && (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: backup lock"); /* Rephrase the checkpoint name for cursor open. */ if (ckpt_config == NULL) strcpy(ckpt_name, "checkpoint=WiredTigerCheckpoint"); else (void)snprintf(ckpt_name, sizeof(ckpt_name), "checkpoint=thread-%d", tinfo->id); ckpt_available = 1; /* Pick the next checkpoint operation. */ ckpt_op += mmrand(&tinfo->rnd, 5000, 20000); } /* * If we're not single-threaded and we're not in a transaction, * start a transaction 20% of the time. */ if (!SINGLETHREADED && !intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) { if ((ret = session->begin_transaction(session, NULL)) != 0) die(ret, "session.begin_transaction"); intxn = 1; } insert = notfound = 0; keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); key.data = keybuf; value.data = valbuf; /* * Perform some number of operations: the percentage of deletes, * inserts and writes are specified, reads are the rest. The * percentages don't have to add up to 100, a high percentage * of deletes will mean fewer inserts and writes. Modifications * are always followed by a read to confirm it worked. */ op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.type) { case ROW: /* * If deleting a non-existent record, the cursor * won't be positioned, and so can't do a next. */ if (row_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; case FIX: case VAR: if (col_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.type) { case ROW: if (row_insert( tinfo, cursor, &key, &value, keyno)) goto deadlock; insert = 1; break; case FIX: case VAR: /* * We can only append so many new records, if * we've reached that limit, update a record * instead of doing an insert. */ if (g.append_cnt >= g.append_max) goto skip_insert; /* Insert, then reset the insert cursor. */ if (col_insert(tinfo, cursor_insert, &key, &value, &keyno)) goto deadlock; if ((ret = cursor_insert->reset(cursor_insert)) != 0) die(ret, "cursor.reset"); insert = 1; break; } } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.type) { case ROW: if (row_update( tinfo, cursor, &key, &value, keyno)) goto deadlock; break; case FIX: case VAR: skip_insert: if (col_update(tinfo, cursor, &key, &value, keyno)) goto deadlock; break; } } else { ++tinfo->search; if (read_row(cursor, &key, keyno)) if (intxn) goto deadlock; continue; } /* * The cursor is positioned if we did any operation other than * insert, do a small number of next/prev cursor operations in * a random direction. */ if (!insert) { dir = (int)mmrand(&tinfo->rnd, 0, 1); for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) { if (notfound) break; if (nextprev(cursor, dir, ¬found)) goto deadlock; } } /* Read to confirm the operation. */ ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; /* Reset the cursor: there is no reason to keep pages pinned. */ if ((ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); /* * If we're in the transaction, commit 40% of the time and * rollback 10% of the time. */ if (intxn) switch (mmrand(&tinfo->rnd, 1, 10)) { case 1: case 2: case 3: case 4: /* 40% */ if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; break; case 5: /* 10% */ if (0) { deadlock: ++tinfo->deadlock; } if ((ret = session->rollback_transaction( session, NULL)) != 0) die(ret, "session.rollback_transaction"); ++tinfo->rollback; intxn = 0; break; default: break; } } if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); free(keybuf); free(valbuf); tinfo->state = TINFO_COMPLETE; return (NULL); }
void WiredTigerSessionCache::waitUntilDurable(bool forceCheckpoint, bool stableCheckpoint) { // For inMemory storage engines, the data is "as durable as it's going to get". // That is, a restart is equivalent to a complete node failure. if (isEphemeral()) { return; } const int shuttingDown = _shuttingDown.fetchAndAdd(1); ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); }); uassert(ErrorCodes::ShutdownInProgress, "Cannot wait for durability because a shutdown is in progress", !(shuttingDown & kShuttingDownMask)); // Stable checkpoints are only meaningful in a replica set. Replication sets the "stable // timestamp". If the stable timestamp is unset, WiredTiger takes a full checkpoint, which is // incidentally what we want. A "true" stable checkpoint (a stable timestamp was set on the // WT_CONNECTION, i.e: replication is on) requires `forceCheckpoint` to be true and journaling // to be enabled. if (stableCheckpoint && getGlobalReplSettings().usingReplSets()) { invariant(forceCheckpoint && _engine->isDurable()); } // When forcing a checkpoint with journaling enabled, don't synchronize with other // waiters, as a log flush is much cheaper than a full checkpoint. if (forceCheckpoint && _engine->isDurable()) { UniqueWiredTigerSession session = getSession(); WT_SESSION* s = session->getSession(); { stdx::unique_lock<stdx::mutex> lk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); auto config = stableCheckpoint ? "use_timestamp=true" : "use_timestamp=false"; invariantWTOK(s->checkpoint(s, config)); _journalListener->onDurable(token); } LOG(4) << "created checkpoint (forced)"; return; } uint32_t start = _lastSyncTime.load(); // Do the remainder in a critical section that ensures only a single thread at a time // will attempt to synchronize. stdx::unique_lock<stdx::mutex> lk(_lastSyncMutex); uint32_t current = _lastSyncTime.loadRelaxed(); // synchronized with writes through mutex if (current != start) { // Someone else synced already since we read lastSyncTime, so we're done! return; } _lastSyncTime.store(current + 1); // Nobody has synched yet, so we have to sync ourselves. // This gets the token (OpTime) from the last write, before flushing (either the journal, or a // checkpoint), and then reports that token (OpTime) as a durable write. stdx::unique_lock<stdx::mutex> jlk(_journalListenerMutex); JournalListener::Token token = _journalListener->getToken(); // Initialize on first use. if (!_waitUntilDurableSession) { invariantWTOK( _conn->open_session(_conn, NULL, "isolation=snapshot", &_waitUntilDurableSession)); } // Use the journal when available, or a checkpoint otherwise. if (_engine && _engine->isDurable()) { invariantWTOK(_waitUntilDurableSession->log_flush(_waitUntilDurableSession, "sync=on")); LOG(4) << "flushed journal"; } else { invariantWTOK(_waitUntilDurableSession->checkpoint(_waitUntilDurableSession, NULL)); LOG(4) << "created checkpoint"; } _journalListener->onDurable(token); }
static void * ops(void *arg) { TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; WT_SESSION *session; WT_ITEM key, value; uint64_t cnt, keyno, sync_op, thread_ops; uint32_t op; uint8_t *keybuf, *valbuf; u_int np; int dir, insert, notfound, ret, sync_drop; char sync_name[64]; conn = g.wts_conn; tinfo = arg; /* Set up the default key and value buffers. */ memset(&key, 0, sizeof(key)); key_gen_setup(&keybuf); memset(&value, 0, sizeof(value)); val_gen_setup(&valbuf); /* Open a session. */ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) die(ret, "connection.open_session"); /* * Open two cursors: one configured for overwriting and one configured * for append if we're dealing with a column-store. * * The reason is when testing with existing records, we don't track if * a record was deleted or not, which means we must use cursor->insert * with overwriting configured. But, in column-store files where we're * testing with new, appended records, we don't want to have to specify * the record number, which requires an append configuration. */ cursor = cursor_insert = NULL; if ((ret = session->open_cursor(session, WT_TABLENAME, NULL, "overwrite", &cursor)) != 0) die(ret, "session.open_cursor"); if ((g.c_file_type == FIX || g.c_file_type == VAR) && (ret = session->open_cursor(session, WT_TABLENAME, NULL, "append", &cursor_insert)) != 0) die(ret, "session.open_cursor"); /* Each thread does its share of the total operations. */ thread_ops = g.c_ops / g.c_threads; /* Pick an operation where we'll do a sync and create the name. */ sync_drop = 0; sync_op = MMRAND(1, thread_ops); snprintf(sync_name, sizeof(sync_name), "snapshot=thread-%d", tinfo->id); for (cnt = 0; cnt < thread_ops; ++cnt) { if (SINGLETHREADED && cnt % 100 == 0) track("read/write ops", 0ULL, tinfo); if (cnt == sync_op) { if (sync_drop && (int)MMRAND(1, 4) == 1) { if ((ret = session->drop( session, WT_TABLENAME, sync_name)) != 0) die(ret, "session.drop: %s: %s", WT_TABLENAME, sync_name); sync_drop = 0; } else { if ((ret = session->checkpoint( session, sync_name)) != 0) die(ret, "session.checkpoint: %s", sync_name); sync_drop = 1; } /* * Pick the next sync operation, try for roughly five * snapshot operations per thread run. */ sync_op += MMRAND(1, thread_ops) / 5; } insert = notfound = 0; keyno = MMRAND(1, g.rows); key.data = keybuf; value.data = valbuf; /* * Perform some number of operations: the percentage of deletes, * inserts and writes are specified, reads are the rest. The * percentages don't have to add up to 100, a high percentage * of deletes will mean fewer inserts and writes. A read * operation always follows a modification to confirm it worked. */ op = (uint32_t)(wts_rand() % 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.c_file_type) { case ROW: /* * If deleting a non-existent record, the cursor * won't be positioned, and so can't do a next. */ row_remove(cursor, &key, keyno, ¬found); break; case FIX: case VAR: col_remove(cursor, &key, keyno, ¬found); break; } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.c_file_type) { case ROW: row_update(cursor, &key, &value, keyno, 1); break; case FIX: case VAR: /* * Reset the standard cursor so it doesn't keep * pages pinned. */ if ((ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); col_insert(cursor_insert, &key, &value, &keyno); insert = 1; break; } } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.c_file_type) { case ROW: row_update(cursor, &key, &value, keyno, 0); break; case FIX: case VAR: col_update(cursor, &key, &value, keyno); break; } } else { ++tinfo->search; read_row(cursor, &key, keyno); continue; } /* * If we did any operation, we've set the cursor, do a small * number of next/prev cursor operations in a random direction. */ dir = (int)MMRAND(0, 1); for (np = 0; np < MMRAND(1, 8); ++np) { if (notfound) break; nextprev( insert ? cursor_insert : cursor, dir, ¬found); } if (insert && (ret = cursor_insert->reset(cursor_insert)) != 0) die(ret, "cursor.reset"); /* Read the value we modified to confirm the operation. */ read_row(cursor, &key, keyno); } if ((ret = session->close(session, NULL)) != 0) die(ret, "session.close"); free(keybuf); free(valbuf); tinfo->state = TINFO_COMPLETE; return (NULL); }
/* * subtest_populate -- * Populate the tables. */ static void subtest_populate(TEST_OPTS *opts, bool close_test) { WT_CURSOR *maincur, *maincur2; WT_RAND_STATE rnd; WT_SESSION *session; uint64_t i, nrecords; uint32_t rndint; int key, v0, v1, v2; char *big, *bigref; bool failed; failed = false; __wt_random_init_seed(NULL, &rnd); CHECK(create_big_string(&bigref), false); nrecords = opts->nrecords; CHECK(opts->conn->open_session( opts->conn, NULL, NULL, &session), false); CHECK(session->open_cursor(session, "table:subtest", NULL, NULL, &maincur), false); CHECK(session->open_cursor(session, "table:subtest2", NULL, NULL, &maincur2), false); for (i = 0; i < nrecords && !failed; i++) { rndint = __wt_random(&rnd); generate_key(i, &key); generate_value(rndint, i, bigref, &v0, &v1, &v2, &big); CHECK(session->begin_transaction(session, NULL), false); maincur->set_key(maincur, key); maincur->set_value(maincur, v0, v1, v2, big); CHECK(maincur->insert(maincur), false); maincur2->set_key(maincur2, key); maincur2->set_value(maincur2, rndint); CHECK(maincur2->insert(maincur2), false); CHECK(session->commit_transaction(session, NULL), false); if (i == 0) /* * Force an initial checkpoint, that helps to * distinguish a clear failure from just not running * long enough. */ CHECK(session->checkpoint(session, NULL), false); if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) printf(" %" PRIu64 "/%" PRIu64 "\n", (i + 1), nrecords); /* Attempt to isolate the failures to checkpointing. */ if (i == (nrecords/100)) { enable_failures(opts->nops, 1000000); /* CHECK should expect failures. */ CHECK(session->checkpoint(session, NULL), true); disable_failures(); if (failed && opts->verbose) printf("checkpoint failed (expected).\n"); } } /* * Closing handles after an extreme fail is likely to cause * cascading failures (or crashes), so recommended practice is * to immediately exit. We're interested in testing both with * and without the recommended practice. */ if (failed) { if (!close_test) { fprintf(stderr, "exit early.\n"); exit(0); } else fprintf(stderr, "closing after failure.\n"); } free(bigref); CHECK(maincur->close(maincur), false); CHECK(maincur2->close(maincur2), false); CHECK(session->close(session, NULL), false); }
static void * ops(void *arg) { TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; WT_SESSION *session; WT_ITEM key, value; uint64_t cnt, keyno, ckpt_op, session_op, thread_ops; uint32_t op; uint8_t *keybuf, *valbuf; u_int np; int dir, insert, intxn, notfound, ret; char *ckpt_config, config[64]; tinfo = arg; conn = g.wts_conn; keybuf = valbuf = NULL; /* Set up the default key and value buffers. */ key_gen_setup(&keybuf); val_gen_setup(&valbuf); /* * Each thread does its share of the total operations, and make sure * that it's not 0 (testing runs: threads might be larger than ops). */ thread_ops = 100 + g.c_ops / g.c_threads; /* * Select the first operation where we'll create sessions and cursors, * perform checkpoint operations. */ ckpt_op = MMRAND(1, thread_ops); session_op = 0; session = NULL; cursor = cursor_insert = NULL; for (intxn = 0, cnt = 0; cnt < thread_ops; ++cnt) { if (SINGLETHREADED && cnt % 100 == 0) track("ops", 0ULL, tinfo); /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. Otherwise, * reset the cursor: we may block waiting for a lock and there * is no reason to keep pages pinned. */ if (cnt == ckpt_op || cnt == session_op) { if (intxn) { if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; } else if (cursor != NULL && (ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); } /* Open up a new session and cursors. */ if (cnt == session_op || session == NULL || cursor == NULL) { if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); if ((ret = conn->open_session( conn, NULL, NULL, &session)) != 0) die(ret, "connection.open_session"); /* * Open two cursors: one configured for overwriting and * one configured for append if we're dealing with a * column-store. * * The reason is when testing with existing records, we * don't track if a record was deleted or not, which * means we must use cursor->insert with overwriting * configured. But, in column-store files where we're * testing with new, appended records, we don't want to * have to specify the record number, which requires an * append configuration. */ if ((ret = session->open_cursor(session, g.uri, NULL, "overwrite", &cursor)) != 0) die(ret, "session.open_cursor"); if ((g.type == FIX || g.type == VAR) && (ret = session->open_cursor(session, g.uri, NULL, "append", &cursor_insert)) != 0) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ session_op += SINGLETHREADED ? MMRAND(1, thread_ops) : 100 * MMRAND(1, 50); } /* Checkpoint the database. */ if (cnt == ckpt_op) { /* * LSM and data-sources don't support named checkpoints, * else 25% of the time we name the checkpoint. */ if (DATASOURCE("lsm") || DATASOURCE("kvsbdb") || DATASOURCE("memrata") || MMRAND(1, 4) == 1) ckpt_config = NULL; else { (void)snprintf(config, sizeof(config), "name=thread-%d", tinfo->id); ckpt_config = config; } /* Named checkpoints lock out hot backups */ if (ckpt_config != NULL && (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: hot-backup lock"); if ((ret = session->checkpoint(session, ckpt_config)) != 0) die(ret, "session.checkpoint%s%s", ckpt_config == NULL ? "" : ": ", ckpt_config == NULL ? "" : ckpt_config); if (ckpt_config != NULL && (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0) die(ret, "pthread_rwlock_wrlock: hot-backup lock"); /* * Pick the next checkpoint operation, try for roughly * five checkpoint operations per thread run. */ ckpt_op += MMRAND(1, thread_ops) / 5; } /* * If we're not single-threaded and we're not in a transaction, * start a transaction 80% of the time. */ if (!SINGLETHREADED && !intxn && MMRAND(1, 10) >= 8) { if ((ret = session->begin_transaction(session, NULL)) != 0) die(ret, "session.begin_transaction"); intxn = 1; } insert = notfound = 0; keyno = MMRAND(1, g.rows); key.data = keybuf; value.data = valbuf; /* * Perform some number of operations: the percentage of deletes, * inserts and writes are specified, reads are the rest. The * percentages don't have to add up to 100, a high percentage * of deletes will mean fewer inserts and writes. Modifications * are always followed by a read to confirm it worked. */ op = (uint32_t)(rng() % 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.type) { case ROW: /* * If deleting a non-existent record, the cursor * won't be positioned, and so can't do a next. */ if (row_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; case FIX: case VAR: if (col_remove(cursor, &key, keyno, ¬found)) goto deadlock; break; } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.type) { case ROW: if (row_insert(cursor, &key, &value, keyno)) goto deadlock; insert = 1; break; case FIX: case VAR: /* * We can only append so many new records, if * we've reached that limit, update a record * instead of doing an insert. */ if (g.append_cnt >= g.append_max) goto skip_insert; /* * Reset the standard cursor so it doesn't keep * pages pinned. */ if ((ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); /* Insert, then reset the insert cursor. */ if (col_insert( cursor_insert, &key, &value, &keyno)) goto deadlock; if ((ret = cursor_insert->reset(cursor_insert)) != 0) die(ret, "cursor.reset"); insert = 1; break; } } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.type) { case ROW: if (row_update(cursor, &key, &value, keyno)) goto deadlock; break; case FIX: case VAR: skip_insert: if (col_update(cursor, &key, &value, keyno)) goto deadlock; break; } } else { ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; continue; } /* * The cursor is positioned if we did any operation other than * insert, do a small number of next/prev cursor operations in * a random direction. */ if (!insert) { dir = (int)MMRAND(0, 1); for (np = 0; np < MMRAND(1, 8); ++np) { if (notfound) break; if (nextprev(cursor, dir, ¬found)) goto deadlock; } } /* Read the value we modified to confirm the operation. */ ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; /* * If we're in the transaction, commit 40% of the time and * rollback 10% of the time. */ if (intxn) switch (MMRAND(1, 10)) { case 1: case 2: case 3: case 4: /* 40% */ if ((ret = session->commit_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->commit; intxn = 0; break; case 5: /* 10% */ if (0) { deadlock: ++tinfo->deadlock; } if ((ret = session->rollback_transaction( session, NULL)) != 0) die(ret, "session.commit_transaction"); ++tinfo->rollback; intxn = 0; break; default: break; } } if (session != NULL && (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); free(keybuf); free(valbuf); tinfo->state = TINFO_COMPLETE; return (NULL); }
int main(int argc, char *argv[]) { WT_CONNECTION *wt_conn; WT_SESSION *session; int i; char cmd_buf[256]; (void)argc; /* Unused variable */ (void)testutil_set_progname(argv); (void)snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home); error_check(system(cmd_buf)); error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)); setup_directories(); error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session)); error_check(session->create( session, uri, "key_format=S,value_format=S")); printf("Adding initial data\n"); add_work(session, 0); printf("Taking initial backup\n"); take_full_backup(session, 0); error_check(session->checkpoint(session, NULL)); for (i = 1; i < MAX_ITERATIONS; i++) { printf("Iteration %d: adding data\n", i); add_work(session, i); error_check(session->checkpoint(session, NULL)); /* * The full backup here is only needed for testing and * comparison purposes. A normal incremental backup * procedure would not include this. */ printf("Iteration %d: taking full backup\n", i); take_full_backup(session, i); /* * Taking the incremental backup also calls truncate * to archive the log files, if the copies were successful. * See that function for details on that call. */ printf("Iteration %d: taking incremental backup\n", i); take_incr_backup(session, i); printf("Iteration %d: dumping and comparing data\n", i); error_check(compare_backups(i)); } /* * Close the connection. We're done and want to run the final * comparison between the incremental and original. */ error_check(wt_conn->close(wt_conn, NULL)); printf("Final comparison: dumping and comparing data\n"); error_check(compare_backups(0)); return (EXIT_SUCCESS); }