/* * __txn_logrec_init -- * Allocate and initialize a buffer for a transaction's log records. */ static int __txn_logrec_init(WT_SESSION_IMPL *session) { WT_DECL_ITEM(logrec); WT_DECL_RET; WT_TXN *txn; const char *fmt = WT_UNCHECKED_STRING(Iq); uint32_t rectype = WT_LOGREC_COMMIT; size_t header_size; txn = &session->txn; if (txn->logrec != NULL) return (0); WT_ASSERT(session, txn->id != WT_TXN_NONE); WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, txn->id)); WT_RET(__wt_logrec_alloc(session, header_size, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, header_size, fmt, rectype, txn->id)); logrec->size += (uint32_t)header_size; txn->logrec = logrec; if (0) { err: __wt_logrec_free(session, &logrec); } return (ret); }
/* * __txn_log_file_sync -- * Write a log record for a file sync. */ static int __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp) { WT_BTREE *btree; WT_DECL_RET; WT_DECL_ITEM(logrec); const char *fmt = WT_UNCHECKED_STRING(III); size_t header_size; uint32_t rectype = WT_LOGREC_FILE_SYNC; int start; btree = S2BT(session); start = LF_ISSET(WT_TXN_LOG_CKPT_START); WT_RET(__wt_struct_size( session, &header_size, fmt, rectype, btree->id, start)); WT_RET(__wt_logrec_alloc(session, header_size, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, header_size, fmt, rectype, btree->id, start)); logrec->size += (uint32_t)header_size; WT_ERR(__wt_log_write(session, logrec, lsnp, 0)); err: __wt_logrec_free(session, &logrec); return (ret); }
/* * __wt_txn_release -- * Release the resources associated with the current transaction. */ void __wt_txn_release(WT_SESSION_IMPL *session) { WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; txn = &session->txn; txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); WT_ASSERT(session, txn->mod_count == 0); txn->notify = NULL; /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { WT_ASSERT(session, txn_state->id == WT_TXN_NONE); txn->id = txn_global->checkpoint_state.id = txn_global->checkpoint_state.pinned_id = WT_TXN_NONE; /* * Be extra careful to cleanup everything for checkpoints: once * the global checkpoint ID is cleared, we can no longer tell * if this session is doing a checkpoint. */ txn_global->checkpoint_id = 0; } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, !WT_TXNID_LT(txn->id, txn_global->last_running)); WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); txn->id = WT_TXN_NONE; } __wt_txn_clear_commit_timestamp(session); __wt_txn_clear_read_timestamp(session); /* Free the scratch buffer allocated for logging. */ __wt_logrec_free(session, &txn->logrec); /* Discard any memory from the session's stash that we can. */ WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) == 0); __wt_stash_discard(session); /* * Reset the transaction state to not running and release the snapshot. */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; }
/* * __wt_log_system_record -- * Write a system log record for the previous LSN. */ int __wt_log_system_record( WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) { WT_DECL_ITEM(logrec_buf); WT_DECL_RET; WT_LOG *log; WT_LOG_RECORD *logrec; WT_LOGSLOT tmp; WT_MYSLOT myslot; const char *fmt = WT_UNCHECKED_STRING(I); uint32_t rectype = WT_LOGREC_SYSTEM; size_t recsize; log = S2C(session)->log; WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf)); memset((uint8_t *)logrec_buf->mem, 0, log->allocsize); WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec_buf->data + logrec_buf->size, recsize, fmt, rectype)); logrec_buf->size += recsize; WT_ERR(__wt_logop_prev_lsn_pack(session, logrec_buf, lsn)); WT_ASSERT(session, logrec_buf->size <= log->allocsize); logrec = (WT_LOG_RECORD *)logrec_buf->mem; /* * We know system records are this size. And we have to adjust * the size now because we're not going through the normal log * write path and the packing functions needed the correct offset * earlier. */ logrec_buf->size = logrec->len = log->allocsize; /* We do not compress nor encrypt this record. */ logrec->checksum = 0; logrec->flags = 0; __wt_log_record_byteswap(logrec); logrec->checksum = __wt_checksum(logrec, log->allocsize); #ifdef WORDS_BIGENDIAN logrec->checksum = __wt_bswap32(logrec->checksum); #endif WT_CLEAR(tmp); memset(&myslot, 0, sizeof(myslot)); myslot.slot = &tmp; __wt_log_slot_activate(session, &tmp); /* * Override the file handle to the one we're using. */ tmp.slot_fh = log_fh; WT_ERR(__wt_log_fill(session, &myslot, true, logrec_buf, NULL)); err: __wt_logrec_free(session, &logrec_buf); return (ret); }
/* * __wt_txn_log_commit -- * Write the operations of a transaction to the log at commit time. */ int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_DECL_RET; WT_DECL_ITEM(logrec); WT_TXN *txn; WT_TXN_OP *op; const char *fmt = WT_UNCHECKED_STRING(Iq); size_t header_size; uint32_t rectype = WT_LOGREC_COMMIT; u_int i; WT_UNUSED(cfg); txn = &session->txn; WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, txn->id)); WT_RET(__wt_logrec_alloc(session, header_size, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, header_size, fmt, rectype, txn->id)); logrec->size += (uint32_t)header_size; /* Write updates to the log. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) switch (op->type) { case TXN_OP_BASIC: WT_ERR(__txn_op_log(session, logrec, op)); break; case TXN_OP_INMEM: case TXN_OP_REF: /* Nothing to log, we're done. */ break; case TXN_OP_TRUNCATE_COL: WT_ERR(__wt_logop_col_truncate_pack(session, logrec, op->fileid, op->u.truncate_col.start, op->u.truncate_col.stop)); break; case TXN_OP_TRUNCATE_ROW: WT_ERR(__wt_logop_row_truncate_pack(session, logrec, op->fileid, &op->u.truncate_row.start, &op->u.truncate_row.stop, (uint32_t)op->u.truncate_row.mode)); break; } WT_ERR(__wt_log_write(session, logrec, NULL, S2C(session)->txn_logsync)); err: __wt_logrec_free(session, &logrec); return (ret); }
/* * __wt_txn_release -- * Release the resources associated with the current transaction. */ void __wt_txn_release(WT_SESSION_IMPL *session) { WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; txn = &session->txn; WT_ASSERT(session, txn->mod_count == 0); txn->notify = NULL; txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { WT_ASSERT(session, txn_state->id == WT_TXN_NONE); txn->id = WT_TXN_NONE; /* Clear the global checkpoint transaction IDs. */ txn_global->checkpoint_id = 0; txn_global->checkpoint_pinned = WT_TXN_NONE; } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, !WT_TXNID_LT(txn->id, txn_global->last_running)); WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); txn->id = WT_TXN_NONE; } /* Free the scratch buffer allocated for logging. */ __wt_logrec_free(session, &txn->logrec); /* Discard any memory from the session's split stash that we can. */ WT_ASSERT(session, session->split_gen == 0); if (session->split_stash_cnt > 0) __wt_split_stash_discard(session); /* * Reset the transaction state to not running and release the snapshot. */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; }
/* * __wt_txn_release -- * Release the resources associated with the current transaction. */ void __wt_txn_release(WT_SESSION_IMPL *session) { WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; int was_oldest; txn = &session->txn; WT_ASSERT(session, txn->mod_count == 0); txn->notify = NULL; txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); was_oldest = 0; /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { WT_ASSERT(session, txn_state->id == WT_TXN_NONE); txn->id = WT_TXN_NONE; /* Clear the global checkpoint transaction IDs. */ txn_global->checkpoint_id = 0; txn_global->checkpoint_pinned = WT_TXN_NONE; } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, !WT_TXNID_LT(txn->id, txn_global->last_running)); WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); /* Quick check for the oldest transaction. */ was_oldest = (txn->id == txn_global->last_running); txn->id = WT_TXN_NONE; } /* Free the scratch buffer allocated for logging. */ __wt_logrec_free(session, &txn->logrec); /* Discard any memory from the session's split stash that we can. */ WT_ASSERT(session, session->split_gen == 0); if (session->split_stash_cnt > 0) __wt_split_stash_discard(session); /* * Reset the transaction state to not running and release the snapshot. */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; /* * When the oldest transaction in the system completes, bump the oldest * ID. This is racy and so not guaranteed, but in practice it keeps * the oldest ID from falling too far behind. */ if (was_oldest) __wt_txn_update_oldest(session, 1); }
/* * __wt_txn_checkpoint_log -- * Write a log record for a checkpoint operation. */ int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, int full, uint32_t flags, WT_LSN *lsnp) { WT_DECL_RET; WT_DECL_ITEM(logrec); WT_LSN *ckpt_lsn; WT_TXN *txn; const char *fmt = WT_UNCHECKED_STRING(IIQIU); uint8_t *end, *p; size_t recsize; uint32_t i, rectype = WT_LOGREC_CHECKPOINT; txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; /* * If this is a file sync, log it unless there is a full checkpoint in * progress. */ if (!full) { if (txn->full_ckpt) { if (lsnp != NULL) *lsnp = *ckpt_lsn; return (0); } else return (__txn_log_file_sync(session, flags, lsnp)); } switch (flags) { case WT_TXN_LOG_CKPT_PREPARE: txn->full_ckpt = 1; *ckpt_lsn = S2C(session)->log->alloc_lsn; break; case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot)); p = txn->ckpt_snapshot->mem; end = p + recsize; for (i = 0; i < txn->snapshot_count; i++) WT_ERR(__wt_vpack_uint( &p, WT_PTRDIFF(end, p), txn->snapshot[i])); break; case WT_TXN_LOG_CKPT_STOP: /* * During a clean connection close, we get here without the * prepare or start steps. In that case, log the current LSN * as the checkpoint LSN. */ if (!txn->full_ckpt) { txn->ckpt_nsnapshot = 0; *ckpt_lsn = S2C(session)->log->alloc_lsn; } /* Write the checkpoint log record. */ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, txn->ckpt_nsnapshot, &txn->ckpt_snapshot)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, txn->ckpt_nsnapshot, &txn->ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, 0)); /* * If this full checkpoint completed successfully and there is * no hot backup in progress, tell the logging subsystem the * checkpoint LSN so that it can archive. */ if (!S2C(session)->hot_backup) WT_ERR(__wt_log_ckpt(session, ckpt_lsn)); /* FALLTHROUGH */ case WT_TXN_LOG_CKPT_FAIL: /* Cleanup any allocated resources */ INIT_LSN(ckpt_lsn); txn->ckpt_nsnapshot = 0; __wt_scr_free(&txn->ckpt_snapshot); txn->full_ckpt = 0; break; } err: __wt_logrec_free(session, &logrec); return (ret); }
/* * __wt_txn_checkpoint_log -- * Write a log record for a checkpoint operation. */ int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(logrec); WT_DECL_RET; WT_ITEM *ckpt_snapshot, empty; WT_LSN *ckpt_lsn; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; uint8_t *end, *p; size_t recsize; uint32_t i, rectype; const char *fmt; conn = S2C(session); txn_global = &conn->txn_global; txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; /* * If this is a file sync, log it unless there is a full checkpoint in * progress. */ if (!full) { if (txn->full_ckpt) { if (lsnp != NULL) *lsnp = *ckpt_lsn; return (0); } return (__txn_log_file_sync(session, flags, lsnp)); } switch (flags) { case WT_TXN_LOG_CKPT_PREPARE: txn->full_ckpt = true; if (conn->compat_major >= WT_LOG_V2) { /* * Write the system log record containing a checkpoint * start operation. */ rectype = WT_LOGREC_SYSTEM; fmt = WT_UNCHECKED_STRING(I); WT_ERR(__wt_struct_size( session, &recsize, fmt, rectype)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_logop_checkpoint_start_pack( session, logrec)); WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0)); } else { WT_ERR(__wt_log_printf(session, "CHECKPOINT: Starting record")); WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); } /* * We take and immediately release the visibility lock. * Acquiring the write lock guarantees that any transaction * that has written to the log has also made its transaction * visible at this time. */ __wt_writelock(session, &txn_global->visibility_rwlock); __wt_writeunlock(session, &txn_global->visibility_rwlock); /* * We need to make sure that the log records in the checkpoint * LSN are on disk. In particular to make sure that the * current log file exists. */ WT_ERR(__wt_log_force_sync(session, ckpt_lsn)); break; case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot)); p = txn->ckpt_snapshot->mem; end = p + recsize; for (i = 0; i < txn->snapshot_count; i++) WT_ERR(__wt_vpack_uint( &p, WT_PTRDIFF(end, p), txn->snapshot[i])); break; case WT_TXN_LOG_CKPT_STOP: /* * During a clean connection close, we get here without the * prepare or start steps. In that case, log the current LSN * as the checkpoint LSN. */ if (!txn->full_ckpt) { txn->ckpt_nsnapshot = 0; WT_CLEAR(empty); ckpt_snapshot = ∅ WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); } else ckpt_snapshot = txn->ckpt_snapshot; /* Write the checkpoint log record. */ rectype = WT_LOGREC_CHECKPOINT; fmt = WT_UNCHECKED_STRING(IIIIu); WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, F_ISSET(conn, WT_CONN_CKPT_SYNC) ? WT_LOG_FSYNC : 0)); /* * If this full checkpoint completed successfully and there is * no hot backup in progress and this is not an unclean * recovery, tell the logging subsystem the checkpoint LSN so * that it can archive. Do not update the logging checkpoint * LSN if this is during a clean connection close, only during * a full checkpoint. A clean close may not update any * metadata LSN and we do not want to archive in that case. */ if (!conn->hot_backup && (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) || FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) && txn->full_ckpt) __wt_log_ckpt(session, ckpt_lsn); /* FALLTHROUGH */ case WT_TXN_LOG_CKPT_CLEANUP: /* Cleanup any allocated resources */ WT_INIT_LSN(ckpt_lsn); txn->ckpt_nsnapshot = 0; __wt_scr_free(session, &txn->ckpt_snapshot); txn->full_ckpt = false; break; WT_ILLEGAL_VALUE_ERR(session); } err: __wt_logrec_free(session, &logrec); return (ret); }
/* * __wt_txn_checkpoint_log -- * Write a log record for a checkpoint operation. */ int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) { WT_DECL_ITEM(logrec); WT_DECL_RET; WT_ITEM *ckpt_snapshot, empty; WT_LSN *ckpt_lsn; WT_TXN *txn; uint8_t *end, *p; size_t recsize; uint32_t i, rectype = WT_LOGREC_CHECKPOINT; const char *fmt = WT_UNCHECKED_STRING(IIQIU); txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; /* * If this is a file sync, log it unless there is a full checkpoint in * progress. */ if (!full) { if (txn->full_ckpt) { if (lsnp != NULL) *lsnp = *ckpt_lsn; return (0); } return (__txn_log_file_sync(session, flags, lsnp)); } switch (flags) { case WT_TXN_LOG_CKPT_PREPARE: txn->full_ckpt = true; WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); /* * We need to make sure that the log records in the checkpoint * LSN are on disk. In particular to make sure that the * current log file exists. */ WT_ERR(__wt_log_force_sync(session, ckpt_lsn)); break; case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot)); p = txn->ckpt_snapshot->mem; end = p + recsize; for (i = 0; i < txn->snapshot_count; i++) WT_ERR(__wt_vpack_uint( &p, WT_PTRDIFF(end, p), txn->snapshot[i])); break; case WT_TXN_LOG_CKPT_STOP: /* * During a clean connection close, we get here without the * prepare or start steps. In that case, log the current LSN * as the checkpoint LSN. */ if (!txn->full_ckpt) { txn->ckpt_nsnapshot = 0; WT_CLEAR(empty); ckpt_snapshot = ∅ WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); } else ckpt_snapshot = txn->ckpt_snapshot; /* Write the checkpoint log record. */ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, txn->ckpt_nsnapshot, ckpt_snapshot)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, txn->ckpt_nsnapshot, ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, F_ISSET(S2C(session), WT_CONN_CKPT_SYNC) ? WT_LOG_FSYNC : 0)); /* * If this full checkpoint completed successfully and there is * no hot backup in progress, tell the logging subsystem the * checkpoint LSN so that it can archive. Do not update the * logging checkpoint LSN if this is during a clean connection * close, only during a full checkpoint. A clean close may not * update any metadata LSN and we do not want to archive in * that case. */ if (!S2C(session)->hot_backup && txn->full_ckpt) WT_ERR(__wt_log_ckpt(session, ckpt_lsn)); /* FALLTHROUGH */ case WT_TXN_LOG_CKPT_CLEANUP: /* Cleanup any allocated resources */ WT_INIT_LSN(ckpt_lsn); txn->ckpt_nsnapshot = 0; __wt_scr_free(session, &txn->ckpt_snapshot); txn->full_ckpt = false; break; WT_ILLEGAL_VALUE_ERR(session); } err: __wt_logrec_free(session, &logrec); return (ret); }