Beispiel #1
0
/*
 * __curlog_reset --
 *	WT_CURSOR.reset method for the log cursor type.
 */
static int
__curlog_reset(WT_CURSOR *cursor)
{
	WT_CURSOR_LOG *cl;

	cl = (WT_CURSOR_LOG *)cursor;
	cl->stepp = cl->stepp_end = NULL;
	cl->step_count = 0;
	INIT_LSN(cl->cur_lsn);
	INIT_LSN(cl->next_lsn);
	return (0);
}
Beispiel #2
0
/*
 * __recovery_setup_file --
 *	Set up the recovery slot for a file.
 */
static int
__recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_LSN lsn;
	uint32_t fileid;

	WT_RET(__wt_config_getones(r->session, config, "id", &cval));
	fileid = (uint32_t)cval.val;

	if (r->nfiles <= fileid) {
		WT_RET(__wt_realloc_def(
		    r->session, &r->file_alloc, fileid + 1, &r->files));
		r->nfiles = fileid + 1;
	}

	WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
	WT_RET(
	    __wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
	/* If there is checkpoint logged for the file, apply everything. */
	if (cval.type != WT_CONFIG_ITEM_STRUCT)
		INIT_LSN(&lsn);
	else if (sscanf(cval.str, "(%" PRIu32 ",%" PRIdMAX ")",
	    &lsn.file, (intmax_t*)&lsn.offset) != 2)
		WT_RET_MSG(r->session, EINVAL,
		    "Failed to parse checkpoint LSN '%.*s'",
		    (int)cval.len, cval.str);
	r->files[fileid].ckpt_lsn = lsn;

	WT_RET(__wt_verbose(r->session, WT_VERB_RECOVERY,
	    "Recovering %s with id %u @ (%" PRIu32 ", %" PRIu64 ")",
	    uri, fileid, lsn.file, lsn.offset));

	return (0);

}
Beispiel #3
0
/*
 * __wt_curlog_open --
 *	Initialize a log cursor.
 */
int
__wt_curlog_open(WT_SESSION_IMPL *session,
    const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR_STATIC_INIT(iface,
	    NULL,			/* get-key */
	    NULL,			/* get-value */
	    NULL,			/* set-key */
	    NULL,			/* set-value */
	    __curlog_compare,		/* compare */
	    __curlog_next,		/* next */
	    __wt_cursor_notsup,		/* prev */
	    __curlog_reset,		/* reset */
	    __curlog_search,		/* search */
	    __wt_cursor_notsup,		/* search-near */
	    __wt_cursor_notsup,		/* insert */
	    __wt_cursor_notsup,		/* update */
	    __wt_cursor_notsup,		/* remove */
	    __curlog_close);		/* close */
	WT_CURSOR *cursor;
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;

	STATIC_ASSERT(offsetof(WT_CURSOR_LOG, iface) == 0);
	conn = S2C(session);
	if (!conn->logging)
		WT_RET_MSG(session, EINVAL,
		    "Cannot open a log cursor without logging enabled");

	cl = NULL;
	WT_RET(__wt_calloc_def(session, 1, &cl));
	cursor = &cl->iface;
	*cursor = iface;
	cursor->session = &session->iface;
	WT_ERR(__wt_calloc_def(session, 1, &cl->cur_lsn));
	WT_ERR(__wt_calloc_def(session, 1, &cl->next_lsn));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
	cursor->key_format = LOGC_KEY_FORMAT;
	cursor->value_format = LOGC_VALUE_FORMAT;

	INIT_LSN(cl->cur_lsn);
	INIT_LSN(cl->next_lsn);

	WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));

	/* Log cursors default to read only. */
	WT_ERR(__wt_cursor_config_readonly(cursor, cfg, 1));

	if (0) {
err:		if (F_ISSET(cursor, WT_CURSTD_OPEN))
			WT_TRET(cursor->close(cursor));
		else {
			__wt_free(session, cl->cur_lsn);
			__wt_free(session, cl->next_lsn);
			__wt_scr_free(&cl->logrec);
			__wt_scr_free(&cl->opkey);
			__wt_scr_free(&cl->opvalue);
			__wt_free(session, cl);
		}
		*cursorp = NULL;
	}

	return (ret);
}
Beispiel #4
0
/*
 * __wt_txn_checkpoint_log --
 *	Write a log record for a checkpoint operation.
 */
int
__wt_txn_checkpoint_log(
    WT_SESSION_IMPL *session, int full, uint32_t flags, WT_LSN *lsnp)
{
	WT_DECL_RET;
	WT_DECL_ITEM(logrec);
	WT_LSN *ckpt_lsn;
	WT_TXN *txn;
	const char *fmt = WT_UNCHECKED_STRING(IIQIU);
	uint8_t *end, *p;
	size_t recsize;
	uint32_t i, rectype = WT_LOGREC_CHECKPOINT;

	txn = &session->txn;
	ckpt_lsn = &txn->ckpt_lsn;

	/*
	 * If this is a file sync, log it unless there is a full checkpoint in
	 * progress.
	 */
	if (!full) {
		if (txn->full_ckpt) {
			if (lsnp != NULL)
				*lsnp = *ckpt_lsn;
			return (0);
		} else
			return (__txn_log_file_sync(session, flags, lsnp));
	}

	switch (flags) {
	case WT_TXN_LOG_CKPT_PREPARE:
		txn->full_ckpt = 1;
		*ckpt_lsn = S2C(session)->log->alloc_lsn;
		break;

	case WT_TXN_LOG_CKPT_START:
		/* Take a copy of the transaction snapshot. */
		txn->ckpt_nsnapshot = txn->snapshot_count;
		recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
		WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
		p = txn->ckpt_snapshot->mem;
		end = p + recsize;
		for (i = 0; i < txn->snapshot_count; i++)
			WT_ERR(__wt_vpack_uint(
			    &p, WT_PTRDIFF(end, p), txn->snapshot[i]));
		break;

	case WT_TXN_LOG_CKPT_STOP:
		/*
		 * During a clean connection close, we get here without the
		 * prepare or start steps.  In that case, log the current LSN
		 * as the checkpoint LSN.
		 */
		if (!txn->full_ckpt) {
			txn->ckpt_nsnapshot = 0;
			*ckpt_lsn = S2C(session)->log->alloc_lsn;
		}

		/* Write the checkpoint log record. */
		WT_ERR(__wt_struct_size(session, &recsize, fmt,
		    rectype, ckpt_lsn->file, ckpt_lsn->offset,
		    txn->ckpt_nsnapshot, &txn->ckpt_snapshot));
		WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));

		WT_ERR(__wt_struct_pack(session,
		    (uint8_t *)logrec->data + logrec->size, recsize, fmt,
		    rectype, ckpt_lsn->file, ckpt_lsn->offset,
		    txn->ckpt_nsnapshot, &txn->ckpt_snapshot));
		logrec->size += (uint32_t)recsize;
		WT_ERR(__wt_log_write(session, logrec, lsnp, 0));

		/*
		 * If this full checkpoint completed successfully and there is
		 * no hot backup in progress, tell the logging subsystem the
		 * checkpoint LSN so that it can archive.
		 */
		if (!S2C(session)->hot_backup)
			WT_ERR(__wt_log_ckpt(session, ckpt_lsn));

		/* FALLTHROUGH */
	case WT_TXN_LOG_CKPT_FAIL:
		/* Cleanup any allocated resources */
		INIT_LSN(ckpt_lsn);
		txn->ckpt_nsnapshot = 0;
		__wt_scr_free(&txn->ckpt_snapshot);
		txn->full_ckpt = 0;
		break;
	}

err:	__wt_logrec_free(session, &logrec);
	return (ret);
}
Beispiel #5
0
/*
 * __wt_curlog_open --
 *	Initialize a log cursor.
 */
int
__wt_curlog_open(WT_SESSION_IMPL *session,
    const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR_STATIC_INIT(iface,
	    __wt_cursor_get_key,	/* get-key */
	    __wt_cursor_get_value,	/* get-value */
	    __wt_cursor_set_key,	/* set-key */
	    __wt_cursor_set_value,	/* set-value */
	    __curlog_compare,		/* compare */
	    __curlog_next,		/* next */
	    __wt_cursor_notsup,		/* prev */
	    __curlog_reset,		/* reset */
	    __curlog_search,		/* search */
	    __wt_cursor_notsup,		/* search-near */
	    __wt_cursor_notsup,		/* insert */
	    __wt_cursor_notsup,		/* update */
	    __wt_cursor_notsup,		/* remove */
	    __curlog_close);		/* close */
	WT_CURSOR *cursor;
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_LOG *log;

	WT_STATIC_ASSERT(offsetof(WT_CURSOR_LOG, iface) == 0);
	conn = S2C(session);
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		WT_RET_MSG(session, EINVAL,
		    "Cannot open a log cursor without logging enabled");

	log = conn->log;
	cl = NULL;
	WT_RET(__wt_calloc_def(session, 1, &cl));
	cursor = &cl->iface;
	*cursor = iface;
	cursor->session = &session->iface;
	WT_ERR(__wt_calloc_def(session, 1, &cl->cur_lsn));
	WT_ERR(__wt_calloc_def(session, 1, &cl->next_lsn));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
	WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
	cursor->key_format = LOGC_KEY_FORMAT;
	cursor->value_format = LOGC_VALUE_FORMAT;

	INIT_LSN(cl->cur_lsn);
	INIT_LSN(cl->next_lsn);

	WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));

	/* Log cursors block archiving. */
	WT_ERR(__wt_readlock(session, log->log_archive_lock));

	if (0) {
err:		if (F_ISSET(cursor, WT_CURSTD_OPEN))
			WT_TRET(cursor->close(cursor));
		else {
			__wt_free(session, cl->cur_lsn);
			__wt_free(session, cl->next_lsn);
			__wt_scr_free(&cl->logrec);
			__wt_scr_free(&cl->opkey);
			__wt_scr_free(&cl->opvalue);
			/*
			 * NOTE:  We cannot get on the error path with the
			 * readlock held.  No need to unlock it unless that
			 * changes above.
			 */
			__wt_free(session, cl);
		}
		*cursorp = NULL;
	}

	return (ret);
}
Beispiel #6
0
/*
 * __wt_log_write --
 *	Write a record into the log.
 */
int
__wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
    uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_RECORD *logrec;
	WT_LSN lsn;
	WT_MYSLOT myslot;
	uint32_t rdup_len;
	int locked;

	conn = S2C(session);
	log = conn->log;
	locked = 0;
	INIT_LSN(&lsn);
	myslot.slot = NULL;
	/*
	 * Assume the WT_ITEM the user passed is a WT_LOG_RECORD, which has
	 * a header at the beginning for us to fill in.
	 *
	 * If using direct_io, the caller should pass us an aligned record.
	 * But we need to make sure it is big enough and zero-filled so
	 * that we can write the full amount.  Do this whether or not
	 * direct_io is in use because it makes the reading code cleaner.
	 */
	WT_STAT_FAST_CONN_INCRV(session, log_bytes_user, record->size);
	rdup_len = __wt_rduppo2((uint32_t)record->size, log->allocsize);
	WT_ERR(__wt_buf_grow(session, record, rdup_len));
	WT_ASSERT(session, record->data == record->mem);
	/*
	 * If the caller's record only partially fills the necessary
	 * space, we need to zero-fill the remainder.
	 */
	if (record->size != rdup_len) {
		memset((uint8_t *)record->mem + record->size, 0,
		    rdup_len - record->size);
		record->size = rdup_len;
	}
	logrec = (WT_LOG_RECORD *)record->mem;
	logrec->len = (uint32_t)record->size;
	logrec->checksum = 0;
	logrec->checksum = __wt_cksum(logrec, record->size);

	WT_STAT_FAST_CONN_INCR(session, log_writes);

	if (!F_ISSET(log, WT_LOG_FORCE_CONSOLIDATE)) {
		ret = __log_direct_write(session, record, lsnp, flags);
		if (ret == 0)
			return (0);
		if (ret != EAGAIN)
			WT_ERR(ret);
		/*
		 * An EAGAIN return means we failed to get the try lock -
		 * fall through to the consolidation code in that case.
		 */
	}

	/*
	 * As soon as we see contention for the log slot, disable direct
	 * log writes. We get better performance by forcing writes through
	 * the consolidation code. This is because individual writes flood
	 * the I/O system faster than they contend on the log slot lock.
	 */
	F_SET(log, WT_LOG_FORCE_CONSOLIDATE);
	if ((ret = __wt_log_slot_join(
	    session, rdup_len, flags, &myslot)) == ENOMEM) {
		/*
		 * If we couldn't find a consolidated slot for this record
		 * write the record directly.
		 */
		while ((ret = __log_direct_write(
		    session, record, lsnp, flags)) == EAGAIN)
			;
		WT_ERR(ret);
		/*
		 * Increase the buffer size of any slots we can get access
		 * to, so future consolidations are likely to succeed.
		 */
		WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len));
		return (0);
	}
	WT_ERR(ret);
	if (myslot.offset == 0) {
		__wt_spin_lock(session, &log->log_slot_lock);
		locked = 1;
		WT_ERR(__wt_log_slot_close(session, myslot.slot));
		WT_ERR(__log_acquire(
		    session, myslot.slot->slot_group_size, myslot.slot));
		__wt_spin_unlock(session, &log->log_slot_lock);
		locked = 0;
		WT_ERR(__wt_log_slot_notify(session, myslot.slot));
	} else
		WT_ERR(__wt_log_slot_wait(session, myslot.slot));
	WT_ERR(__log_fill(session, &myslot, 0, record, &lsn));
	if (__wt_log_slot_release(myslot.slot, rdup_len) == WT_LOG_SLOT_DONE) {
		WT_ERR(__log_release(session, myslot.slot));
		WT_ERR(__wt_log_slot_free(myslot.slot));
	} else if (LF_ISSET(WT_LOG_FSYNC)) {
		/* Wait for our writes to reach disk */
		while (LOG_CMP(&log->sync_lsn, &lsn) <= 0 &&
		    myslot.slot->slot_error == 0)
			(void)__wt_cond_wait(
			    session, log->log_sync_cond, 10000);
	}
err:
	if (locked)
		__wt_spin_unlock(session, &log->log_slot_lock);
	if (ret == 0 && lsnp != NULL)
		*lsnp = lsn;
	/*
	 * If we're synchronous and some thread had an error, we don't know
	 * if our write made it out to the file or not.  The error could be
	 * before or after us.  So, if anyone got an error, we report it.
	 * If we're not synchronous, only report if our own operation got
	 * an error.
	 */
	if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 &&
	    myslot.slot != NULL)
		ret = myslot.slot->slot_error;
	return (ret);
}
Beispiel #7
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_CONNECTION_IMPL *conn)
{
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	WT_SESSION_IMPL *session;
	struct WT_RECOVERY_FILE *metafile;
	char *config;
	int was_backup;

	WT_CLEAR(r);
	INIT_LSN(&r.ckpt_lsn);
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP) ? 1 : 0;

	/* We need a real session for recovery. */
	WT_RET(__wt_open_session(conn, NULL, NULL, &session));
	F_SET(session, WT_SESSION_NO_LOGGING);
	r.session = session;

	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 */
	if (!was_backup) {
		r.metadata_only = 1;
		if (IS_INIT_LSN(&metafile->ckpt_lsn))
			WT_ERR(__wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
		else
			WT_ERR(__wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r));

		WT_ASSERT(session,
		    LOG_CMP(&r.ckpt_lsn, &conn->log->first_lsn) >= 0);
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = 0;
	WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
	    "Main recovery loop: starting at %u/%" PRIuMAX,
	    r.ckpt_lsn.file, (uintmax_t)r.ckpt_lsn.offset));
	if (IS_INIT_LSN(&r.ckpt_lsn))
		WT_ERR(__wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r));
	else
		WT_ERR(__wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r));

	conn->next_file_id = r.max_fileid;

	/*
	 * If recovery ran successfully forcibly log a checkpoint so the next
	 * open is fast and keep the metadata up to date with the checkpoint
	 * LSN and archiving.
	 */
	WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);
	WT_TRET(session->iface.close(&session->iface, NULL));

	return (ret);
}