Example #1
0
/*
 * __log_filesize --
 *	Returns an estimate of the real end of log file.
 */
static int
__log_filesize(WT_SESSION_IMPL *session, WT_FH *fh, off_t *eof)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint64_t rec;
	uint32_t allocsize;
	off_t log_size, off;

	conn = S2C(session);
	log = conn->log;
	if (eof == NULL)
		return (0);
	*eof = 0;
	WT_ERR(__wt_filesize(session, fh, &log_size));
	if (log == NULL)
		allocsize = LOG_ALIGN;
	else
		allocsize = log->allocsize;
	/*
	 * We know all log records are aligned at log->allocsize.  The first
	 * item in a log record is always the length.  Look for any non-zero
	 * at the allocsize boundary.  This may not be a true log record since
	 * it could be the middle of a large record.  But we know no log record
	 * starts after it.  Return an estimate of the log file size.
	 */
	for (off = log_size - (off_t)allocsize;
	    off > 0;
	    off -= (off_t)allocsize) {
		WT_ERR(__wt_read(session, fh, off, sizeof(uint64_t), &rec));
		if (rec != 0)
			break;
	}
	/*
	 * Set EOF to the last zero-filled record we saw.
	 */
	*eof = off + (off_t)allocsize;
err:
	return (ret);
}
Example #2
0
/*
 * __wt_fopen --
 *	Open a stream handle.
 */
int
__wt_fopen(WT_SESSION_IMPL *session,
    const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp)
{
	WT_DECL_RET;
	WT_FH *fh;
	WT_FSTREAM *fstr;

	*fstrp = NULL;

	fstr = NULL;

	WT_RET(__wt_open(
	    session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));

	WT_ERR(__wt_calloc_one(session, &fstr));
	fstr->fh = fh;
	fstr->name = fh->name;
	fstr->flags = flags;

	fstr->close = __fstream_close;
	WT_ERR(__wt_filesize(session, fh, &fstr->size));
	if (LF_ISSET(WT_STREAM_APPEND))
		fstr->off = fstr->size;
	if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) {
		fstr->fstr_flush = __fstream_flush;
		fstr->fstr_getline = __fstream_getline_notsup;
		fstr->fstr_printf = __fstream_printf;
	} else {
		WT_ASSERT(session, LF_ISSET(WT_STREAM_READ));
		fstr->fstr_flush = __fstream_flush_notsup;
		fstr->fstr_getline = __fstream_getline;
		fstr->fstr_printf = __fstream_printf_notsup;
	}
	*fstrp = fstr;
	return (0);

err:	WT_TRET(__wt_close(session, &fh));
	__wt_free(session, fstr);
	return (ret);
}
Example #3
0
/*
 * __wt_optrack_record_funcid --
 *	Allocate and record optrack function ID.
 */
void
__wt_optrack_record_funcid(
    WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
{
	static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	wt_off_t fsize;
	bool locked;

	conn = S2C(session);
	locked = false;

	WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp));

	__wt_spin_lock(session, &conn->optrack_map_spinlock);
	locked = true;
	if (*func_idp == 0) {
		*func_idp = ++optrack_uid;

		WT_ERR(__wt_buf_fmt(
		    session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
		WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize));
		WT_ERR(__wt_write(session,
		    conn->optrack_map_fh, fsize, tmp->size, tmp->data));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret,
		    "operation tracking initialization failure");
	}

	if (locked)
		__wt_spin_unlock(session, &conn->optrack_map_spinlock);
	__wt_scr_free(session, &tmp);
}
Example #4
0
/*
 * __wt_open --
 *	Open a file handle.
 */
int
__wt_open(WT_SESSION_IMPL *session,
    const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp)
{
	DWORD dwCreationDisposition;
	HANDLE filehandle, filehandle_secondary;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *fh, *tfh;
	uint64_t bucket, hash;
	int direct_io, f, matched, share_mode;
	char *path;

	conn = S2C(session);
	fh = NULL;
	path = NULL;
	filehandle = INVALID_HANDLE_VALUE;
	filehandle_secondary = INVALID_HANDLE_VALUE;
	direct_io = 0;
	hash = __wt_hash_city64(name, strlen(name));
	bucket = hash % WT_HASH_ARRAY_SIZE;

	WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name));

	/* Increment the reference count if we already have the file open. */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	SLIST_FOREACH(tfh, &conn->fhhash[bucket], l)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->ref;
			*fhp = tfh;
			matched = 1;
			break;
		}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched)
		return (0);

	/* For directories, create empty file handles with invalid handles */
	if (dio_type == WT_FILE_TYPE_DIRECTORY) {
		goto setupfh;
	}

	WT_RET(__wt_filename(session, name, &path));

	share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 *
	 * TODO: Set tighter file permissions but set bInheritHandle to false
	 * to prevent inheritance
	 */

	f = FILE_ATTRIBUTE_NORMAL;

	dwCreationDisposition = 0;
	if (ok_create) {
		dwCreationDisposition = CREATE_NEW;
		if (exclusive)
			dwCreationDisposition = CREATE_ALWAYS;
	} else
		dwCreationDisposition = OPEN_EXISTING;

	if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
		f |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
		direct_io = 1;
	}

	if (dio_type == WT_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
		f |= FILE_FLAG_WRITE_THROUGH;
	}

	/* Disable read-ahead on trees: it slows down random read workloads. */
	if (dio_type == WT_FILE_TYPE_DATA ||
	    dio_type == WT_FILE_TYPE_CHECKPOINT)
		f |= FILE_FLAG_RANDOM_ACCESS;

	filehandle = CreateFileA(path,
				(GENERIC_READ | GENERIC_WRITE),
				share_mode,
				NULL,
				dwCreationDisposition,
				f,
				NULL);
	if (filehandle == INVALID_HANDLE_VALUE) {
		if (GetLastError() == ERROR_FILE_EXISTS && ok_create)
			filehandle = CreateFileA(path,
						(GENERIC_READ | GENERIC_WRITE),
						share_mode,
						NULL,
						OPEN_EXISTING,
						f,
						NULL);

		if (filehandle == INVALID_HANDLE_VALUE)
			WT_ERR_MSG(session, __wt_errno(),
			    direct_io ?
			    "%s: open failed with direct I/O configured, some "
			    "filesystem types do not support direct I/O" :
			    "%s", path);
	}

	/*
	 * Open a second handle to file to support allocation/truncation
	 * concurrently with reads on the file. Writes would also move the file
	 * pointer.
	 */
	filehandle_secondary = CreateFileA(path,
	    (GENERIC_READ | GENERIC_WRITE),
	    share_mode,
	    NULL,
	    OPEN_EXISTING,
	    f,
	    NULL);
	if (filehandle == INVALID_HANDLE_VALUE)
		WT_ERR_MSG(session, __wt_errno(),
		    "open failed for secondary handle: %s", path);

setupfh:
	WT_ERR(__wt_calloc_one(session, &fh));
	WT_ERR(__wt_strdup(session, name, &fh->name));
	fh->name_hash = hash;
	fh->filehandle = filehandle;
	fh->filehandle_secondary = filehandle_secondary;
	fh->ref = 1;
	fh->direct_io = direct_io;

	/* Set the file's size. */
	if (dio_type != WT_FILE_TYPE_DIRECTORY)
		WT_ERR(__wt_filesize(session, fh, &fh->size));

	/* Configure file extension. */
	if (dio_type == WT_FILE_TYPE_DATA ||
	    dio_type == WT_FILE_TYPE_CHECKPOINT)
		fh->extend_len = conn->data_extend_len;

	/* Configure fallocate/posix_fallocate calls. */
	__wt_fallocate_config(session, fh);

	/*
	 * Repeat the check for a match, but then link onto the database's list
	 * of files.
	 */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	SLIST_FOREACH(tfh, &conn->fhhash[bucket], l)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->ref;
			*fhp = tfh;
			matched = 1;
			break;
		}
	if (!matched) {
		WT_CONN_FILE_INSERT(conn, fh, bucket);
		WT_STAT_FAST_CONN_INCR(session, file_open);

		*fhp = fh;
	}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched) {
err:		if (fh != NULL) {
			__wt_free(session, fh->name);
			__wt_free(session, fh);
		}
		if (filehandle != INVALID_HANDLE_VALUE)
			(void)CloseHandle(filehandle);
		if (filehandle_secondary != INVALID_HANDLE_VALUE)
			(void)CloseHandle(filehandle_secondary);
	}

	__wt_free(session, path);
	return (ret);
}
Example #5
0
/*
 * __conn_single --
 *	Confirm that no other thread of control is using this database.
 */
static int
__conn_single(WT_SESSION_IMPL *session, const char **cfg)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn, *t;
	WT_DECL_RET;
	off_t size;
	uint32_t len;
	int created;
	char buf[256];

	conn = S2C(session);

	/*
	 * Optionally create the wiredtiger flag file if it doesn't already
	 * exist.  We don't actually care if we create it or not, the "am I the
	 * only locker" tests are all that matter.
	 */
	WT_RET(__wt_config_gets(session, cfg, "create", &cval));
	WT_RET(__wt_open(session,
	    WT_SINGLETHREAD, cval.val == 0 ? 0 : 1, 0, 0, &conn->lock_fh));

	/*
	 * Lock a byte of the file: if we don't get the lock, some other process
	 * is holding it, we're done.  Note the file may be zero-length length,
	 * and that's OK, the underlying call supports acquisition of locks past
	 * the end-of-file.
	 */
	if (__wt_bytelock(conn->lock_fh, (off_t)0, 1) != 0)
		WT_ERR_MSG(session, EBUSY, "%s",
		    "WiredTiger database is already being managed by another "
		    "process");

	/* Check to see if another thread of control has this database open. */
	__wt_spin_lock(session, &__wt_process.spinlock);
	TAILQ_FOREACH(t, &__wt_process.connqh, q)
		if (t->home != NULL &&
		    t != conn && strcmp(t->home, conn->home) == 0) {
			ret = EBUSY;
			break;
		}
	__wt_spin_unlock(session, &__wt_process.spinlock);
	if (ret != 0)
		WT_ERR_MSG(session, EBUSY, "%s",
		    "WiredTiger database is already being managed by another "
		    "thread in this process");

	/*
	 * If the size of the file is 0, we created it (or we're racing with
	 * the thread that created it, it doesn't matter), write some bytes
	 * into the file.  Strictly speaking, this isn't even necessary, but
	 * zero-length files always make me nervous.
	 */
	WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
	if (size == 0) {
		len = (uint32_t)snprintf(buf, sizeof(buf), "%s\n%s\n",
		    WT_SINGLETHREAD, wiredtiger_version(NULL, NULL, NULL));
		WT_ERR(__wt_write(
		    session, conn->lock_fh, (off_t)0, (uint32_t)len, buf));
		created = 1;
	} else
		created = 0;

	/*
	 * If we found a zero-length WiredTiger lock file, and eventually ended
	 * as the database owner, return that we created the database.  (There
	 * is a theoretical chance that another process created the WiredTiger
	 * lock file but we won the race to add the WT_CONNECTION_IMPL structure
	 * to the process' list.  It doesn't much matter, only one thread will
	 * be told it created the database.)
	 */
	conn->is_new = created;

	return (0);

err:	if (conn->lock_fh != NULL) {
		WT_TRET(__wt_close(session, conn->lock_fh));
		conn->lock_fh = NULL;
	}
	return (ret);
}
Example #6
0
/*
 * __conn_config_file --
 *	Read in any WiredTiger_config file in the home directory.
 */
static int
__conn_config_file(WT_SESSION_IMPL *session, const char **cfg, WT_ITEM **cbufp)
{
	WT_DECL_ITEM(cbuf);
	WT_DECL_RET;
	WT_FH *fh;
	off_t size;
	uint32_t len;
	int exist, quoted;
	uint8_t *p, *t;

	*cbufp = NULL;				/* Returned buffer */

	fh = NULL;

	/* Check for an optional configuration file. */
#define	WT_CONFIGFILE	"WiredTiger.config"
	WT_RET(__wt_exist(session, WT_CONFIGFILE, &exist));
	if (!exist)
		return (0);

	/* Open the configuration file. */
	WT_RET(__wt_open(session, WT_CONFIGFILE, 0, 0, 0, &fh));
	WT_ERR(__wt_filesize(session, fh, &size));
	if (size == 0)
		goto err;

	/*
	 * Sanity test: a 100KB configuration file would be insane.  (There's
	 * no practical reason to limit the file size, but I can either limit
	 * the file size to something rational, or I can add code to test if
	 * the off_t size is larger than a uint32_t, which is more complicated
	 * and a waste of time.)
	 */
	if (size > 100 * 1024)
		WT_ERR_MSG(session, EFBIG, WT_CONFIGFILE);
	len = (uint32_t)size;

	/*
	 * Copy the configuration file into memory, with a little slop, I'm not
	 * interested in debugging off-by-ones.
	 *
	 * The beginning of a file is the same as if we run into an unquoted
	 * newline character, simplify the parsing loop by pretending that's
	 * what we're doing.
	 */
	WT_ERR(__wt_scr_alloc(session, len + 10,  &cbuf));
	WT_ERR(
	    __wt_read(session, fh, (off_t)0, len, ((uint8_t *)cbuf->mem) + 1));
	((uint8_t *)cbuf->mem)[0] = '\n';
	cbuf->size = len + 1;

	/*
	 * Collapse the file's lines into a single string: newline characters
	 * are replaced with commas unless the newline is quoted or backslash
	 * escaped.  Comment lines (an unescaped newline where the next non-
	 * white-space character is a hash), are discarded.
	 */
	for (quoted = 0, p = t = cbuf->mem; len > 0;) {
		/*
		 * Backslash pairs pass through untouched, unless immediately
		 * preceding a newline, in which case both the backslash and
		 * the newline are discarded.  Backslash characters escape
		 * quoted characters, too, that is, a backslash followed by a
		 * quote doesn't start or end a quoted string.
		 */
		if (*p == '\\' && len > 1) {
			if (p[1] != '\n') {
				*t++ = p[0];
				*t++ = p[1];
			}
			p += 2;
			len -= 2;
			continue;
		}

		/*
		 * If we're in a quoted string, or starting a quoted string,
		 * take all characters, including white-space and newlines.
		 */
		if (quoted || *p == '"') {
			if (*p == '"')
				quoted = !quoted;
			*t++ = *p++;
			--len;
			continue;
		}

		/* Everything else gets taken, except for newline characters. */
		if (*p != '\n') {
			*t++ = *p++;
			--len;
			continue;
		}

		/*
		 * Replace any newline characters with commas (and strings of
		 * commas are safe).
		 *
		 * After any newline, skip to a non-white-space character; if
		 * the next character is a hash mark, skip to the next newline.
		 */
		for (;;) {
			for (*t++ = ','; --len > 0 && isspace(*++p);)
				;
			if (len == 0)
				break;
			if (*p != '#')
				break;
			while (--len > 0 && *++p != '\n')
				;
			if (len == 0)
				break;
		}
	}
	*t = '\0';

#if 0
	fprintf(stderr, "file config: {%s}\n", (const char *)cbuf->data);
#endif

	/* Check the configuration string. */
	WT_ERR(__wt_config_check(
	    session, __wt_confchk_wiredtiger_open, cbuf->data, 0));

	/*
	 * The configuration file falls between the default configuration and
	 * the wiredtiger_open() configuration, overriding the defaults but not
	 * overriding the wiredtiger_open() configuration.
	 */
	while (cfg[1] != NULL)
		++cfg;
	cfg[1] = cfg[0];
	cfg[0] = cbuf->data;

	*cbufp = cbuf;

	if (0) {
err:		if (cbuf != NULL)
			__wt_buf_free(session, cbuf);
	}
	if (fh != NULL)
		WT_TRET(__wt_close(session, fh));
	return (ret);
}
Example #7
0
/*
 * __ckpt_update --
 *	Update a checkpoint.
 */
static int
__ckpt_update(
    WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt,
    WT_BLOCK_CKPT *ci, uint64_t ckpt_size, int is_live)
{
	WT_EXTLIST *alloc;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	uint8_t *endp;

#ifdef HAVE_DIAGNOSTIC
	/* Check the extent list combinations for overlaps. */
	WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->avail));
	WT_RET(__wt_block_extlist_check(session, &ci->discard, &ci->avail));
	WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->discard));
#endif
	/*
	 * Write the checkpoint's alloc and discard extent lists.  After each
	 * write, remove any allocated blocks from the system's allocation
	 * list, checkpoint extent blocks don't appear on any extent lists.
	 */
	alloc = &block->live.alloc;
	WT_RET(__wt_block_extlist_write(session, block, &ci->alloc, NULL));
	if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_block_off_remove_overlap(
		    session, alloc, ci->alloc.offset, ci->alloc.size));
	WT_RET(__wt_block_extlist_write(session, block, &ci->discard, NULL));
	if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_block_off_remove_overlap(
		    session, alloc, ci->discard.offset, ci->discard.size));

	/*
	 * We only write an avail list for the live system, other checkpoint's
	 * avail lists are static and never change.
	 *
	 * Write the avail list last so it reflects changes due to allocating
	 * blocks for the alloc and discard lists.  Second, when we write the
	 * live system's avail list, it's two lists: the current avail list
	 * plus the list of blocks to be made available when the new checkpoint
	 * completes.  We can't merge that second list into the real list yet,
	 * it's not truly available until the new checkpoint locations have been
	 * saved to the metadata.
	 */
	if (is_live) {
		WT_RET(__wt_block_extlist_write(
		    session, block, &ci->avail, &ci->ckpt_avail));
		if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET)
			WT_RET(__wt_block_off_remove_overlap(
			    session, alloc, ci->avail.offset, ci->avail.size));
	}

	/*
	 * Set the file size for the live system.
	 *
	 * XXX
	 * We do NOT set the file size when re-writing checkpoints because we
	 * want to test the checkpoint's blocks against a reasonable maximum
	 * file size during verification.  This is bad: imagine a checkpoint
	 * appearing early in the file, re-written, and then the checkpoint
	 * requires blocks at the end of the file, blocks after the listed file
	 * size.  If the application opens that checkpoint for writing
	 * (discarding subsequent checkpoints), we would truncate the file to
	 * the early chunk, discarding the re-written checkpoint information.
	 * The alternative, updating the file size has its own problems, in
	 * that case we'd work correctly, but we'd lose all of the blocks
	 * between the original checkpoint and the re-written checkpoint.
	 * Currently, there's no API to roll-forward intermediate checkpoints,
	 * if there ever is, this will need to be fixed.
	 */
	if (is_live)
		WT_RET(__wt_filesize(session, block->fh, &ci->file_size));

	/* Set the checkpoint size for the live system. */
	if (is_live)
		ci->ckpt_size = ckpt_size;

	/*
	 * Copy the checkpoint information into the checkpoint array's address
	 * cookie.
	 */
	WT_RET(__wt_buf_init(session, &ckpt->raw, WT_BTREE_MAX_ADDR_COOKIE));
	endp = ckpt->raw.mem;
	WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, ci));
	ckpt->raw.size = WT_PTRDIFF32(endp, ckpt->raw.mem);

	if (WT_VERBOSE_ISSET(session, ckpt)) {
		WT_RET(__wt_scr_alloc(session, 0, &tmp));
		WT_ERR(__ckpt_string(session, block, ckpt->raw.data, tmp));
		WT_VERBOSE_ERR(session, ckpt,
		    "%s: create-checkpoint: %s: %s",
		    block->name, ckpt->name, (char *)tmp->data);
	}

err:	__wt_scr_free(&tmp);
	return (ret);
}
Example #8
0
/*
 * __snapshot_update --
 *	Update a snapshot.
 */
static int
__snapshot_update(
    WT_SESSION_IMPL *session, WT_BLOCK *block, WT_SNAPSHOT *snap,
    WT_BLOCK_SNAPSHOT *si, uint64_t snapshot_size, int is_live)
{
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	uint8_t *endp;

#ifdef HAVE_DIAGNOSTIC
	/* Check the extent list combinations for overlaps. */
	WT_RET(__wt_block_extlist_check(session, &si->alloc, &si->avail));
	WT_RET(__wt_block_extlist_check(session, &si->discard, &si->avail));
	WT_RET(__wt_block_extlist_check(session, &si->alloc, &si->discard));
#endif
	/*
	 * Write the snapshot's extent lists; we only write an avail list for
	 * the live system, other snapshot's avail lists are static and never
	 * change.  When we do write the avail list for the live system it's
	 * two lists: the current avail list plus the list of blocks that are
	 * being made available as of the new snapshot.  We can't merge that
	 * second list into the real list yet, it's not truly available until
	 * the new snapshot location has been saved to the metadata.
	 */
	WT_RET(__wt_block_extlist_write(session, block, &si->alloc, NULL));
	if (is_live)
		WT_RET(__wt_block_extlist_write(
		    session, block, &si->avail, &si->snapshot_avail));
	WT_RET(__wt_block_extlist_write(session, block, &si->discard, NULL));

	/*
	 * Set the file size for the live system.
	 *
	 * XXX
	 * We do NOT set the file size when re-writing snapshots because we want
	 * to test the snapshot's blocks against a reasonable maximum file size
	 * during verification.  This is not good: imagine a snapshot appearing
	 * early in the file, re-written, and then the snapshot requires blocks
	 * at the end of the file, blocks after the listed file size.  If the
	 * application opens that snapshot for writing (discarding subsequent
	 * snapshots), we would truncate the file to the early chunk, discarding
	 * the re-written snapshot information.  The alternative, updating the
	 * file size has its own problems, in that case we'd work correctly, but
	 * we'd lose all of the blocks between the original snapshot and the
	 * re-written snapshot.  Currently, there's no API to roll-forward
	 * intermediate snapshots, if there ever is, this will need to be fixed.
	 */
	if (is_live)
		WT_RET(__wt_filesize(session, block->fh, &si->file_size));

	/* Set the snapshot size for the live system. */
	if (is_live)
		si->snapshot_size = snapshot_size;

	/*
	 * Copy the snapshot information into the snapshot array's address
	 * cookie.
	 */
	WT_RET(__wt_buf_init(session, &snap->raw, WT_BTREE_MAX_ADDR_COOKIE));
	endp = snap->raw.mem;
	WT_RET(__wt_block_snapshot_to_buffer(session, block, &endp, si));
	snap->raw.size = WT_PTRDIFF32(endp, snap->raw.mem);

	if (WT_VERBOSE_ISSET(session, snapshot)) {
		WT_RET(__wt_scr_alloc(session, 0, &tmp));
		WT_ERR(__snapshot_string(session, block, snap->raw.data, tmp));
		WT_VERBOSE_ERR(session, snapshot,
		    "%s: create-snapshot: %s: %s",
		    block->name, snap->name, (char *)tmp->data);
	}

err:	__wt_scr_free(&tmp);
	return (ret);
}
Example #9
0
	WT_ERR(__wt_remove_if_exists(session, tmp->data, false));

	/* Open the from and temporary file handles. */
	WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
	WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
	    WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh));

	/*
	 * Allocate a copy buffer. Don't use a scratch buffer, this thing is
	 * big, and we don't want it hanging around.
	 */
#define	WT_BACKUP_COPY_SIZE	(128 * 1024)
	WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf));

	/* Get the file's size, then copy the bytes. */
	WT_ERR(__wt_filesize(session, ffh, &size));
	for (offset = 0; size > 0; size -= n, offset += n) {
		n = WT_MIN(size, WT_BACKUP_COPY_SIZE);
		WT_ERR(__wt_read(session, ffh, offset, (size_t)n, buf));
		WT_ERR(__wt_write(session, tfh, offset, (size_t)n, buf));
	}

	/* Close the from handle, then swap the temporary file into place. */
	WT_ERR(__wt_close(session, &ffh));
	WT_ERR(__wt_fsync(session, tfh, true));
	WT_ERR(__wt_close(session, &tfh));

	ret = __wt_fs_rename(session, tmp->data, to, true);

err:	WT_TRET(__wt_close(session, &ffh));
	WT_TRET(__wt_close(session, &tfh));
Example #10
0
/*
 * __wt_open --
 *	Open a file handle.
 */
int
__wt_open(WT_SESSION_IMPL *session,
    const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *fh, *tfh;
	mode_t mode;
	int direct_io, f, fd, matched;
	const char *path;

	conn = S2C(session);
	fh = NULL;
	fd = -1;
	path = NULL;

	WT_VERBOSE_RET(session, fileops, "%s: open", name);

	/* Increment the reference count if we already have the file open. */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhqh, q)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->refcnt;
			*fhp = tfh;
			matched = 1;
			break;
		}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched)
		return (0);

	WT_RET(__wt_filename(session, name, &path));

	f = O_RDWR;
#ifdef O_BINARY
	/* Windows clones: we always want to treat the file as a binary. */
	f |= O_BINARY;
#endif
#ifdef O_CLOEXEC
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 */
	f |= O_CLOEXEC;
#endif
#ifdef O_NOATIME
	/* Avoid updating metadata for read-only workloads. */
	if (dio_type == WT_FILE_TYPE_DATA)
		f |= O_NOATIME;
#endif

	if (ok_create) {
		f |= O_CREAT;
		if (exclusive)
			f |= O_EXCL;
		mode = 0666;
	} else
		mode = 0;

	direct_io = 0;
#ifdef O_DIRECT
	if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
		f |= O_DIRECT;
		direct_io = 1;
	}
#endif
	if (dio_type == WT_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
#ifdef O_DSYNC
		f |= O_DSYNC;
#elif defined(O_SYNC)
		f |= O_SYNC;
#else
		WT_ERR_MSG(session, ENOTSUP,
		    "Unsupported log sync mode requested");
#endif
	WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret);
	if (ret != 0)
		WT_ERR_MSG(session, ret,
		    direct_io ?
		    "%s: open failed with direct I/O configured, some "
		    "filesystem types do not support direct I/O" : "%s", path);

#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.  There's an obvious
	 * race here, so we prefer the flag to open if available.
	 */
	if ((f = fcntl(fd, F_GETFD)) == -1 ||
	    fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
		WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name);
#endif

#if defined(HAVE_POSIX_FADVISE)
	/* Disable read-ahead on trees: it slows down random read workloads. */
	if (dio_type == WT_FILE_TYPE_DATA)
		WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
#endif

	if (F_ISSET(conn, WT_CONN_CKPT_SYNC))
		WT_ERR(__open_directory_sync(session));

	WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh));
	WT_ERR(__wt_strdup(session, name, &fh->name));
	fh->fd = fd;
	fh->refcnt = 1;
	fh->direct_io = direct_io;

	/* Set the file's size. */
	WT_ERR(__wt_filesize(session, fh, &fh->size));

	/* Configure file extension. */
	if (dio_type == WT_FILE_TYPE_DATA)
		fh->extend_len = conn->data_extend_len;

	/*
	 * Repeat the check for a match, but then link onto the database's list
	 * of files.
	 */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhqh, q)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->refcnt;
			*fhp = tfh;
			matched = 1;
			break;
		}
	if (!matched) {
		TAILQ_INSERT_TAIL(&conn->fhqh, fh, q);
		WT_STAT_FAST_CONN_INCR(session, file_open);

		*fhp = fh;
	}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched) {
err:		if (fh != NULL) {
			__wt_free(session, fh->name);
			__wt_free(session, fh);
		}
		if (fd != -1)
			(void)close(fd);
	}

	__wt_free(session, path);
	return (ret);
}