Example #1
0
/*
 * __wt_dirlist --
 *	Get a list of files from a directory, optionally filtered by
 *	a given prefix.
 */
int
__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
    uint32_t flags, char ***dirlist, u_int *countp)
{
	struct dirent *dp;
	DIR *dirp;
	WT_DECL_RET;
	size_t dirallocsz;
	u_int count, dirsz;
	int match;
	char **entries, *path;

	*dirlist = NULL;
	*countp = 0;

	WT_RET(__wt_filename(session, dir, &path));

	dirp = NULL;
	dirallocsz = 0;
	dirsz = 0;
	entries = NULL;
	if (flags == 0)
		LF_SET(WT_DIRLIST_INCLUDE);

	WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS,
	    "wt_dirlist of %s %s prefix %s",
	    path, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude",
	    prefix == NULL ? "all" : prefix));

	WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret);
	if (ret != 0)
		WT_ERR_MSG(session, ret, "%s: opendir", path);
	for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) {
		/*
		 * Skip . and ..
		 */
		if (strcmp(dp->d_name, ".") == 0 ||
		    strcmp(dp->d_name, "..") == 0)
			continue;
		match = 0;
		if (prefix != NULL &&
		    ((LF_ISSET(WT_DIRLIST_INCLUDE) &&
		    WT_PREFIX_MATCH(dp->d_name, prefix)) ||
		    (LF_ISSET(WT_DIRLIST_EXCLUDE) &&
		    !WT_PREFIX_MATCH(dp->d_name, prefix))))
			match = 1;
		if (prefix == NULL || match) {
			/*
			 * We have a file name we want to return.
			 */
			count++;
			if (count > dirsz) {
				dirsz += WT_DIR_ENTRY;
				WT_ERR(__wt_realloc_def(
				    session, &dirallocsz, dirsz, &entries));
			}
			WT_ERR(__wt_strdup(
			    session, dp->d_name, &entries[count-1]));
		}
	}
	if (count > 0)
		*dirlist = entries;
	*countp = count;
err:
	if (dirp != NULL)
		(void)closedir(dirp);
	__wt_free(session, path);

	if (ret == 0)
		return (0);

	if (*dirlist != NULL) {
		for (count = dirsz; count > 0; count--)
			__wt_free(session, entries[count]);
		__wt_free(session, entries);
	}
	WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix);
}
Example #2
0
/*
 * __wt_schema_open_table --
 *	Open a named table.
 */
int
__wt_schema_open_table(WT_SESSION_IMPL *session,
    const char *name, size_t namelen, int ok_incomplete, WT_TABLE **tablep)
{
	WT_CONFIG cparser;
	WT_CONFIG_ITEM ckey, cval;
	WT_CURSOR *cursor;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_TABLE *table;
	const char *tconfig;
	char *tablename;

	cursor = NULL;
	table = NULL;
	tablename = NULL;

	WT_ASSERT(session, F_ISSET(session, WT_SESSION_TABLE_LOCKED));

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf, "table:%.*s", (int)namelen, name));
	WT_ERR(__wt_strndup(session, buf->data, buf->size, &tablename));

	WT_ERR(__wt_metadata_cursor(session, NULL, &cursor));
	cursor->set_key(cursor, tablename);
	WT_ERR(cursor->search(cursor));
	WT_ERR(cursor->get_value(cursor, &tconfig));

	WT_ERR(__wt_calloc_def(session, 1, &table));
	table->name = tablename;
	tablename = NULL;

	WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval));

	WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format));
	WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format));
	WT_ERR(__wt_strdup(session, tconfig, &table->config));

	/* Point to some items in the copy to save re-parsing. */
	WT_ERR(__wt_config_getones(session, table->config,
	    "columns", &table->colconf));

	/*
	 * Count the number of columns: tables are "simple" if the columns
	 * are not named.
	 */
	WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf));
	table->is_simple = 1;
	while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
		table->is_simple = 0;
	if (ret != WT_NOTFOUND)
		goto err;

	/* Check that the columns match the key and value formats. */
	if (!table->is_simple)
		WT_ERR(__wt_schema_colcheck(session,
		    table->key_format, table->value_format, &table->colconf,
		    &table->nkey_columns, NULL));

	WT_ERR(__wt_config_getones(session, table->config,
	    "colgroups", &table->cgconf));

	/* Count the number of column groups. */
	WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf));
	table->ncolgroups = 0;
	while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
		++table->ncolgroups;
	if (ret != WT_NOTFOUND)
		goto err;

	if (table->ncolgroups > 0 && table->is_simple)
		WT_ERR_MSG(session, EINVAL,
		    "%s requires a table with named columns", tablename);

	WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups));
	WT_ERR(__wt_schema_open_colgroups(session, table));

	if (!ok_incomplete && !table->cg_complete)
		WT_ERR_MSG(session, EINVAL, "'%s' cannot be used "
		    "until all column groups are created",
		    table->name);

	/* Copy the schema generation into the new table. */
	table->schema_gen = S2C(session)->schema_gen;

	*tablep = table;

	if (0) {
err:		if (table != NULL)
			WT_TRET(__wt_schema_destroy_table(session, table));
	}
	if (cursor != NULL)
		WT_TRET(cursor->close(cursor));

	__wt_free(session, tablename);
	__wt_scr_free(&buf);
	return (ret);
}
Example #3
0
/*
 * __win_open_file --
 *	Open a file handle.
 */
static int
__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
    WT_FILE_HANDLE **file_handlep)
{
	DWORD dwCreationDisposition, windows_error;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(name_wide);
	WT_DECL_RET;
	WT_FILE_HANDLE *file_handle;
	WT_FILE_HANDLE_WIN *win_fh;
	WT_SESSION_IMPL *session;
	int desired_access, f;

	WT_UNUSED(file_system);
	session = (WT_SESSION_IMPL *)wt_session;
	conn = S2C(session);
	*file_handlep = NULL;

	WT_RET(__wt_calloc_one(session, &win_fh));
	win_fh->direct_io = false;

	/* Set up error handling. */
	win_fh->filehandle =
	    win_fh->filehandle_secondary = INVALID_HANDLE_VALUE;

	WT_ERR(__wt_to_utf16_string(session, name, &name_wide));

	/*
	 * Opening a file handle on a directory is only to support filesystems
	 * that require a directory sync for durability, and Windows doesn't
	 * require that functionality: create an empty WT_FH structure with
	 * invalid handles.
	 */
	if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
		goto directory_open;

	desired_access = GENERIC_READ;
	if (!LF_ISSET(WT_FS_OPEN_READONLY))
		desired_access |= GENERIC_WRITE;

	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 *
	 * TODO: Set tighter file permissions but set bInheritHandle to false
	 * to prevent inheritance
	 */
	f = FILE_ATTRIBUTE_NORMAL;

	dwCreationDisposition = 0;
	if (LF_ISSET(WT_FS_OPEN_CREATE)) {
		dwCreationDisposition = CREATE_NEW;
		if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
			dwCreationDisposition = CREATE_ALWAYS;
	} else
		dwCreationDisposition = OPEN_EXISTING;

	/* Direct I/O. */
	if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
		f |= FILE_FLAG_NO_BUFFERING;
		win_fh->direct_io = true;
	}

	/* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
	if (FLD_ISSET(conn->write_through, file_type))
		f |= FILE_FLAG_WRITE_THROUGH;

	if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
		f |= FILE_FLAG_WRITE_THROUGH;

	/* If the user indicated a random workload, disable read-ahead. */
	if (file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
	    LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
		f |= FILE_FLAG_RANDOM_ACCESS;

	/* If the user indicated a sequential workload, set that. */
	if (file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
	    LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
		f |= FILE_FLAG_SEQUENTIAL_SCAN;

	win_fh->filehandle = CreateFileW(name_wide->data, desired_access,
	    FILE_SHARE_READ | FILE_SHARE_WRITE,
	    NULL, dwCreationDisposition, f, NULL);
	if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
		if (LF_ISSET(WT_FS_OPEN_CREATE) &&
		    GetLastError() == ERROR_FILE_EXISTS)
			win_fh->filehandle = CreateFileW(name_wide->data,
			    desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE,
			    NULL, OPEN_EXISTING, f, NULL);
		if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
			windows_error = __wt_getlasterror();
			ret = __wt_map_windows_error(windows_error);
			__wt_err(session, ret,
			    win_fh->direct_io ?
			    "%s: handle-open: CreateFileW: failed with direct "
			    "I/O configured, some filesystem types do not "
			    "support direct I/O: %s" :
			    "%s: handle-open: CreateFileW: %s",
			    name, __wt_formatmessage(session, windows_error));
			WT_ERR(ret);
		}
	}

	/*
	 * Open a second handle to file to support file extension/truncation
	 * concurrently with reads on the file. Writes would also move the
	 * file pointer.
	 */
	if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
		win_fh->filehandle_secondary = CreateFileW(name_wide->data,
		    desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE,
		    NULL, OPEN_EXISTING, f, NULL);
		if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) {
			windows_error = __wt_getlasterror();
			ret = __wt_map_windows_error(windows_error);
			__wt_err(session, ret,
			    "%s: handle-open: Creatively: secondary: %s",
			    name, __wt_formatmessage(session, windows_error));
			WT_ERR(ret);
		}
	}

directory_open:
	/* Initialize public information. */
	file_handle = (WT_FILE_HANDLE *)win_fh;
	WT_ERR(__wt_strdup(session, name, &file_handle->name));

	file_handle->close = __win_file_close;
	file_handle->fh_lock = __win_file_lock;
#ifdef WORDS_BIGENDIAN
	/*
	 * The underlying objects are little-endian, mapping objects isn't
	 * currently supported on big-endian systems.
	 */
#else
	file_handle->fh_map = __wt_win_map;
	file_handle->fh_unmap = __wt_win_unmap;
#endif
	file_handle->fh_read = __win_file_read;
	file_handle->fh_size = __win_file_size;
	file_handle->fh_sync = __win_file_sync;

	/* Extend and truncate share the same implementation. */
	file_handle->fh_extend = __win_file_set_end;
	file_handle->fh_truncate = __win_file_set_end;

	file_handle->fh_write = __win_file_write;

	*file_handlep = file_handle;

	__wt_scr_free(session, &name_wide);
	return (0);

err:	__wt_scr_free(session, &name_wide);
	WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session));
	return (ret);
}
Example #4
0
/*
 * __wt_schema_open_table --
 *	Open a named table.
 */
int
__wt_schema_open_table(WT_SESSION_IMPL *session,
    const char *name, size_t namelen, WT_TABLE **tablep)
{
	WT_CONFIG cparser;
	WT_CONFIG_ITEM ckey, cval;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_ITEM buf;
	WT_TABLE *table;
	const char *tconfig;
	char *tablename;

	cursor = NULL;
	table = NULL;

	WT_CLEAR(buf);
	WT_RET(__wt_buf_fmt(session, &buf, "table:%.*s", (int)namelen, name));
	tablename = __wt_buf_steal(session, &buf, NULL);

	WT_ERR(__wt_metadata_cursor(session, NULL, &cursor));
	cursor->set_key(cursor, tablename);
	WT_ERR(cursor->search(cursor));
	WT_ERR(cursor->get_value(cursor, &tconfig));

	WT_ERR(__wt_calloc_def(session, 1, &table));
	table->name = tablename;
	tablename = NULL;

	WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval));

	WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format));
	WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format));
	WT_ERR(__wt_strdup(session, tconfig, &table->config));

	/* Point to some items in the copy to save re-parsing. */
	WT_ERR(__wt_config_getones(session, table->config,
	    "columns", &table->colconf));

	/*
	 * Count the number of columns: tables are "simple" if the columns
	 * are not named.
	 */
	WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf));
	table->is_simple = 1;
	while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
		table->is_simple = 0;
	if (ret != WT_NOTFOUND)
		goto err;

	/* Check that the columns match the key and value formats. */
	if (!table->is_simple)
		WT_ERR(__wt_schema_colcheck(session,
		    table->key_format, table->value_format, &table->colconf,
		    &table->nkey_columns, NULL));

	WT_ERR(__wt_config_getones(session, table->config,
	    "colgroups", &table->cgconf));

	/* Count the number of column groups. */
	WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf));
	table->ncolgroups = 0;
	while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
		++table->ncolgroups;
	if (ret != WT_NOTFOUND)
		goto err;

	WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups));
	WT_ERR(__wt_schema_open_colgroups(session, table));
	*tablep = table;

	if (0) {
err:		if (table != NULL)
			__wt_schema_destroy_table(session, table);
	}
	if (cursor != NULL)
		WT_TRET(cursor->close(cursor));
	__wt_free(session, tablename);
	return (ret);
}
Example #5
0
/*
 * __wt_schema_open_index --
 *	Open one or more indices for a table.
 */
int
__wt_schema_open_index(WT_SESSION_IMPL *session,
    WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
{
	WT_CURSOR *cursor;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_INDEX *idx;
	u_int i;
	int cmp, match;
	const char *idxconf, *name, *tablename, *uri;

	/* Check if we've already done the work. */
	if (idxname == NULL && table->idx_complete)
		return (0);

	cursor = NULL;
	idx = NULL;

	/* Build a search key. */
	tablename = table->name;
	(void)WT_PREFIX_SKIP(tablename, "table:");
	WT_ERR(__wt_scr_alloc(session, 512, &tmp));
	WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename));

	/* Find matching indices. */
	WT_ERR(__wt_metadata_cursor(session, NULL, &cursor));
	cursor->set_key(cursor, tmp->data);
	if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0)
		ret = cursor->next(cursor);
	for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) {
		WT_ERR(cursor->get_key(cursor, &uri));
		name = uri;
		if (!WT_PREFIX_SKIP(name, tmp->data))
			break;

		/* Is this the index we are looking for? */
		match = idxname == NULL || WT_STRING_MATCH(name, idxname, len);

		/*
		 * Ensure there is space, including if we have to make room for
		 * a new entry in the middle of the list.
		 */
		WT_ERR(__wt_realloc_def(session, &table->idx_alloc,
		    WT_MAX(i, table->nindices) + 1, &table->indices));

		/* Keep the in-memory list in sync with the metadata. */
		cmp = 0;
		while (table->indices[i] != NULL &&
		    (cmp = strcmp(uri, table->indices[i]->name)) > 0) {
			/* Index no longer exists, remove it. */
			__wt_free(session, table->indices[i]);
			memmove(&table->indices[i], &table->indices[i + 1],
			    (table->nindices - i) * sizeof(WT_INDEX *));
			table->indices[--table->nindices] = NULL;
		}
		if (cmp < 0) {
			/* Make room for a new index. */
			memmove(&table->indices[i + 1], &table->indices[i],
			    (table->nindices - i) * sizeof(WT_INDEX *));
			table->indices[i] = NULL;
			++table->nindices;
		}

		if (!match)
			continue;

		if (table->indices[i] == NULL) {
			WT_ERR(cursor->get_value(cursor, &idxconf));
			WT_ERR(__wt_calloc_def(session, 1, &idx));
			WT_ERR(__wt_strdup(session, uri, &idx->name));
			WT_ERR(__wt_strdup(session, idxconf, &idx->config));
			WT_ERR(__open_index(session, table, idx));

			table->indices[i] = idx;
			idx = NULL;
		}

		/* If we were looking for a single index, we're done. */
		if (indexp != NULL)
			*indexp = table->indices[i];
		if (idxname != NULL)
			break;
	}
	WT_ERR_NOTFOUND_OK(ret);

	/* If we did a full pass, we won't need to do it again. */
	if (idxname == NULL) {
		table->nindices = i;
		table->idx_complete = 1;
	}

err:	__wt_scr_free(&tmp);
	if (idx != NULL)
		WT_TRET(__wt_schema_destroy_index(session, idx));
	if (cursor != NULL)
		WT_TRET(cursor->close(cursor));
	return (ret);
}
Example #6
0
/*
 * __wt_open --
 *	Open a file handle.
 */
int
__wt_open(WT_SESSION_IMPL *session,
          const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp)
{
    WT_CONNECTION_IMPL *conn;
    WT_DECL_RET;
    WT_FH *fh;
    bool lock_file, open_called;
    char *path;

    WT_ASSERT(session, file_type != 0);	/* A file type is required. */

    conn = S2C(session);
    fh = NULL;
    open_called = false;
    path = NULL;

    WT_RET(__open_verbose(session, name, file_type, flags));

    /* Check if the handle is already open. */
    if (__wt_handle_search(session, name, true, NULL, &fh)) {
        /*
         * XXX
         * The in-memory implementation has to reset the file offset
         * when a file is re-opened (which obviously also depends on
         * in-memory configurations never opening a file in more than
         * one thread at a time). This needs to be fixed.
         */
        if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1)
            fh->off = 0;
        *fhp = fh;
        return (0);
    }

    /* Allocate a structure and set the name. */
    WT_ERR(__wt_calloc_one(session, &fh));
    WT_ERR(__wt_strdup(session, name, &fh->name));

    /*
     * If this is a read-only connection, open all files read-only except
     * the lock file.
     *
     * The only file created in read-only mode is the lock file.
     */
    if (F_ISSET(conn, WT_CONN_READONLY)) {
        lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
        if (!lock_file)
            LF_SET(WT_OPEN_READONLY);
        WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE));
    }

    /* Create the path to the file. */
    if (!LF_ISSET(WT_OPEN_FIXED))
        WT_ERR(__wt_filename(session, name, &path));

    /* Call the underlying open function. */
    WT_ERR(conn->handle_open(
               session, fh, path == NULL ? name : path, file_type, flags));
    open_called = true;

    /*
     * Repeat the check for a match: if there's no match, link our newly
     * created handle onto the database's list of files.
     */
    if (__wt_handle_search(session, name, true, fh, fhp)) {
err:
        if (open_called)
            WT_TRET(fh->fh_close(session, fh));
        if (fh != NULL) {
            __wt_free(session, fh->name);
            __wt_free(session, fh);
        }
    }

    __wt_free(session, path);
    return (ret);
}
Example #7
0
File: os_open.c Project: To4e/mongo
/*
 * __wt_open --
 *	Open a file handle.
 */
int
__wt_open(WT_SESSION_IMPL *session,
    const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp)
{
	DWORD dwCreationDisposition;
	HANDLE filehandle, filehandle_secondary;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *fh, *tfh;
	uint64_t bucket, hash;
	int direct_io, f, matched, share_mode;
	char *path;

	conn = S2C(session);
	fh = NULL;
	path = NULL;
	filehandle = INVALID_HANDLE_VALUE;
	filehandle_secondary = INVALID_HANDLE_VALUE;
	direct_io = 0;
	hash = __wt_hash_city64(name, strlen(name));
	bucket = hash % WT_HASH_ARRAY_SIZE;

	WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name));

	/* Increment the reference count if we already have the file open. */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->ref;
			*fhp = tfh;
			matched = 1;
			break;
		}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched)
		return (0);

	/* For directories, create empty file handles with invalid handles */
	if (dio_type == WT_FILE_TYPE_DIRECTORY) {
		goto setupfh;
	}

	WT_RET(__wt_filename(session, name, &path));

	share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 *
	 * TODO: Set tighter file permissions but set bInheritHandle to false
	 * to prevent inheritance
	 */

	f = FILE_ATTRIBUTE_NORMAL;

	dwCreationDisposition = 0;
	if (ok_create) {
		dwCreationDisposition = CREATE_NEW;
		if (exclusive)
			dwCreationDisposition = CREATE_ALWAYS;
	} else
		dwCreationDisposition = OPEN_EXISTING;

	if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
		f |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
		direct_io = 1;
	}

	if (dio_type == WT_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
		f |= FILE_FLAG_WRITE_THROUGH;
	}

	/* Disable read-ahead on trees: it slows down random read workloads. */
	if (dio_type == WT_FILE_TYPE_DATA ||
	    dio_type == WT_FILE_TYPE_CHECKPOINT)
		f |= FILE_FLAG_RANDOM_ACCESS;

	filehandle = CreateFileA(path,
				(GENERIC_READ | GENERIC_WRITE),
				share_mode,
				NULL,
				dwCreationDisposition,
				f,
				NULL);
	if (filehandle == INVALID_HANDLE_VALUE) {
		if (GetLastError() == ERROR_FILE_EXISTS && ok_create)
			filehandle = CreateFileA(path,
						(GENERIC_READ | GENERIC_WRITE),
						share_mode,
						NULL,
						OPEN_EXISTING,
						f,
						NULL);

		if (filehandle == INVALID_HANDLE_VALUE)
			WT_ERR_MSG(session, __wt_errno(),
			    direct_io ?
			    "%s: open failed with direct I/O configured, some "
			    "filesystem types do not support direct I/O" :
			    "%s", path);
	}

	/*
	 * Open a second handle to file to support allocation/truncation
	 * concurrently with reads on the file. Writes would also move the file
	 * pointer.
	 */
	filehandle_secondary = CreateFileA(path,
	    (GENERIC_READ | GENERIC_WRITE),
	    share_mode,
	    NULL,
	    OPEN_EXISTING,
	    f,
	    NULL);
	if (filehandle == INVALID_HANDLE_VALUE)
		WT_ERR_MSG(session, __wt_errno(),
		    "open failed for secondary handle: %s", path);

setupfh:
	WT_ERR(__wt_calloc_one(session, &fh));
	WT_ERR(__wt_strdup(session, name, &fh->name));
	fh->name_hash = hash;
	fh->filehandle = filehandle;
	fh->filehandle_secondary = filehandle_secondary;
	fh->ref = 1;
	fh->direct_io = direct_io;

	/* Set the file's size. */
	if (dio_type != WT_FILE_TYPE_DIRECTORY)
		WT_ERR(__wt_filesize(session, fh, &fh->size));

	/* Configure file extension. */
	if (dio_type == WT_FILE_TYPE_DATA ||
	    dio_type == WT_FILE_TYPE_CHECKPOINT)
		fh->extend_len = conn->data_extend_len;

	/* Configure fallocate/posix_fallocate calls. */
	__wt_fallocate_config(session, fh);

	/*
	 * Repeat the check for a match, but then link onto the database's list
	 * of files.
	 */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->ref;
			*fhp = tfh;
			matched = 1;
			break;
		}
	if (!matched) {
		WT_CONN_FILE_INSERT(conn, fh, bucket);
		(void)WT_ATOMIC_ADD4(conn->open_file_count, 1);

		*fhp = fh;
	}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched) {
err:		if (fh != NULL) {
			__wt_free(session, fh->name);
			__wt_free(session, fh);
		}
		if (filehandle != INVALID_HANDLE_VALUE)
			(void)CloseHandle(filehandle);
		if (filehandle_secondary != INVALID_HANDLE_VALUE)
			(void)CloseHandle(filehandle_secondary);
	}

	__wt_free(session, path);
	return (ret);
}
Example #8
0
/*
 * __wt_curjoin_open --
 *	Initialize a join cursor.
 *
 *	Join cursors are read-only.
 */
int
__wt_curjoin_open(WT_SESSION_IMPL *session,
    const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
	WT_CURSOR_STATIC_INIT(iface,
	    __curjoin_get_key,		/* get-key */
	    __curjoin_get_value,	/* get-value */
	    __wt_cursor_notsup,		/* set-key */
	    __wt_cursor_notsup,		/* set-value */
	    __wt_cursor_notsup,		/* compare */
	    __wt_cursor_notsup,		/* equals */
	    __curjoin_next,		/* next */
	    __wt_cursor_notsup,		/* prev */
	    __curjoin_reset,		/* reset */
	    __wt_cursor_notsup,		/* search */
	    __wt_cursor_notsup,		/* search-near */
	    __wt_cursor_notsup,		/* insert */
	    __wt_cursor_notsup,		/* update */
	    __wt_cursor_notsup,		/* remove */
	    __wt_cursor_notsup,		/* reconfigure */
	    __curjoin_close);		/* close */
	WT_CURSOR *cursor;
	WT_CURSOR_JOIN *cjoin;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_TABLE *table;
	size_t size;
	const char *tablename, *columns;

	WT_STATIC_ASSERT(offsetof(WT_CURSOR_JOIN, iface) == 0);

	if (!WT_PREFIX_SKIP(uri, "join:"))
		return (EINVAL);
	tablename = uri;
	if (!WT_PREFIX_SKIP(tablename, "table:"))
		return (EINVAL);

	columns = strchr(tablename, '(');
	if (columns == NULL)
		size = strlen(tablename);
	else
		size = WT_PTRDIFF(columns, tablename);
	WT_RET(__wt_schema_get_table(session, tablename, size, 0, &table));

	WT_RET(__wt_calloc_one(session, &cjoin));
	cursor = &cjoin->iface;
	*cursor = iface;
	cursor->session = &session->iface;
	cursor->internal_uri = table->name;
	cursor->key_format = table->key_format;
	cursor->value_format = table->value_format;
	cjoin->table = table;

	/* Handle projections. */
	WT_ERR(__wt_scr_alloc(session, 0, &tmp));
	if (columns != NULL) {
		WT_ERR(__wt_struct_reformat(session, table,
		    columns, strlen(columns), NULL, 1, tmp));
		WT_ERR(__wt_strndup(
		    session, tmp->data, tmp->size, &cursor->value_format));
		WT_ERR(__wt_strdup(session, columns, &cjoin->projection));
	}

	if (owner != NULL)
		WT_ERR(EINVAL);

	WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));

	if (0) {
err:		WT_TRET(__curjoin_close(cursor));
		*cursorp = NULL;
	}

	__wt_scr_free(session, &tmp);
	return (ret);
}
Example #9
0
/*
 * __wt_configure_method --
 *	WT_CONNECTION.configure_method.
 */
int
__wt_configure_method(WT_SESSION_IMPL *session,
                      const char *method, const char *uri,
                      const char *config, const char *type, const char *check)
{
    const WT_CONFIG_CHECK *cp;
    WT_CONFIG_CHECK *checks, *newcheck;
    const WT_CONFIG_ENTRY **epp;
    WT_CONFIG_ENTRY *entry;
    WT_CONNECTION_IMPL *conn;
    WT_DECL_RET;
    size_t cnt;
    char *newcheck_name, *p;

    /*
     * !!!
     * We ignore the specified uri, that is, all new configuration options
     * will be valid for all data sources.   That's shouldn't be too bad
     * as the worst that can happen is an application might specify some
     * configuration option and not get an error -- the option should be
     * ignored by the underlying implementation since it's unexpected, so
     * there shouldn't be any real problems.  Eventually I expect we will
     * get the whole data-source thing sorted, at which time there may be
     * configuration arrays for each data source, and that's when the uri
     * will matter.
     */
    WT_UNUSED(uri);

    conn = S2C(session);
    checks = newcheck = NULL;
    entry = NULL;
    newcheck_name = NULL;

    /* Argument checking; we only support a limited number of types. */
    if (config == NULL)
        WT_RET_MSG(session, EINVAL, "no configuration specified");
    if (type == NULL)
        WT_RET_MSG(session, EINVAL, "no configuration type specified");
    if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 &&
            strcmp(type, "list") != 0 && strcmp(type, "string") != 0)
        WT_RET_MSG(session, EINVAL,
                   "type must be one of \"boolean\", \"int\", \"list\" or "
                   "\"string\"");

    /* Find a match for the method name. */
    for (epp = conn->config_entries; (*epp)->method != NULL; ++epp)
        if (strcmp((*epp)->method, method) == 0)
            break;
    if ((*epp)->method == NULL)
        WT_RET_MSG(session,
                   WT_NOTFOUND, "no method matching %s found", method);

    /*
     * Technically possible for threads to race, lock the connection while
     * adding the new configuration information.  We're holding the lock
     * for an extended period of time, but configuration changes should be
     * rare and only happen during startup.
     */
    __wt_spin_lock(session, &conn->api_lock);

    /*
     * Allocate new configuration entry and fill it in.
     *
     * The new base value is the previous base value, a separator and the
     * new configuration string.
     */
    WT_ERR(__wt_calloc_def(session, 1, &entry));
    entry->method = (*epp)->method;
    WT_ERR(__wt_calloc_def(session,
                           strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p));
    (void)strcpy(p, (*epp)->base);
    (void)strcat(p, ",");
    (void)strcat(p, config);
    entry->base = p;

    /*
     * Build a new checks entry name field.  There may be a default value
     * in the config argument we're passed, we don't want that as part of
     * the checks entry name field.
     */
    WT_ERR(__wt_strdup(session, config, &newcheck_name));
    if ((p = strchr(newcheck_name, '=')) != NULL)
        *p = '\0';

    /*
     * Build a new checks array.  The new configuration name may replace
     * an existing check with new information, in that case skip the old
     * version.
     */
    for (cnt = 0, cp = (*epp)->checks; cp->name != NULL; ++cp)
        ++cnt;
    WT_ERR(__wt_calloc_def(session, cnt + 2, &checks));
    for (cnt = 0, cp = (*epp)->checks; cp->name != NULL; ++cp)
        if (strcmp(newcheck_name, cp->name) != 0)
            checks[cnt++] = *cp;
    newcheck = &checks[cnt];

    newcheck->name = newcheck_name;
    WT_ERR(__wt_strdup(session, type, &newcheck->type));
    if (check != NULL)
        WT_ERR(__wt_strdup(session, check, &newcheck->checks));

    entry->checks = checks;

    /* Confirm the configuration string passes the new set of checks. */
    WT_ERR(config_check(session, entry->checks, config, 0));

    /*
     * The next time this configuration is updated, we don't want to figure
     * out which of these pieces of memory were allocated and will need to
     * be free'd on close, add them to the list now.
     */
    WT_ERR(__wt_conn_foc_add(session,
                             entry, entry->base,
                             checks, newcheck->name, newcheck->type, newcheck->checks, NULL));

    *epp = entry;

    if (0) {
err:
        if (entry != NULL) {
            __wt_free(session, entry->base);
            __wt_free(session, entry);
        }
        __wt_free(session, checks);
        if (newcheck != NULL) {
            __wt_free(session, newcheck->type);
            __wt_free(session, newcheck->checks);
        }
        __wt_free(session, newcheck_name);
    }

    __wt_spin_unlock(session, &conn->api_lock);
    return (ret);
}
Example #10
0
/*
 * __wt_configure_method --
 *	WT_CONNECTION.configure_method.
 */
int
__wt_configure_method(WT_SESSION_IMPL *session,
    const char *method, const char *uri,
    const char *config, const char *type, const char *check)
{
	const WT_CONFIG_CHECK *cp;
	WT_CONFIG_CHECK *checks, *newcheck;
	const WT_CONFIG_ENTRY **epp;
	WT_CONFIG_ENTRY *entry;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	size_t cnt;
	char *newcheck_name, *p;

	/*
	 * !!!
	 * We ignore the specified uri, that is, all new configuration options
	 * will be valid for all data sources. That shouldn't be too bad as
	 * the worst that can happen is an application might specify some
	 * configuration option and not get an error -- the option should be
	 * ignored by the underlying implementation since it's unexpected, so
	 * there shouldn't be any real problems.  Eventually I expect we will
	 * get the whole data-source thing sorted, at which time there may be
	 * configuration arrays for each data source, and that's when the uri
	 * will matter.
	 */
	WT_UNUSED(uri);

	conn = S2C(session);
	checks = newcheck = NULL;
	entry = NULL;
	newcheck_name = NULL;

	/* Argument checking; we only support a limited number of types. */
	if (config == NULL)
		WT_RET_MSG(session, EINVAL, "no configuration specified");
	if (type == NULL)
		WT_RET_MSG(session, EINVAL, "no configuration type specified");
	if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 &&
	    strcmp(type, "list") != 0 && strcmp(type, "string") != 0)
		WT_RET_MSG(session, EINVAL,
		    "type must be one of \"boolean\", \"int\", \"list\" or "
		    "\"string\"");

	/*
	 * Translate the method name to our configuration names, then find a
	 * match.
	 */
	for (epp = conn->config_entries;
	    *epp != NULL && (*epp)->method != NULL; ++epp)
		if (strcmp((*epp)->method, method) == 0)
			break;
	if (*epp == NULL || (*epp)->method == NULL)
		WT_RET_MSG(session,
		    WT_NOTFOUND, "no method matching %s found", method);

	/*
	 * Technically possible for threads to race, lock the connection while
	 * adding the new configuration information.  We're holding the lock
	 * for an extended period of time, but configuration changes should be
	 * rare and only happen during startup.
	 */
	__wt_spin_lock(session, &conn->api_lock);

	/*
	 * Allocate new configuration entry and fill it in.
	 *
	 * The new base value is the previous base value, a separator and the
	 * new configuration string.
	 */
	WT_ERR(__wt_calloc_one(session, &entry));
	entry->method = (*epp)->method;
	WT_ERR(__wt_calloc_def(session,
	    strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p));
	(void)strcpy(p, (*epp)->base);
	(void)strcat(p, ",");
	(void)strcat(p, config);
	entry->base = p;

	/*
	 * There may be a default value in the config argument passed in (for
	 * example, (kvs_parallelism=64").  The default value isn't part of the
	 * name, build a new one.
	 */
	WT_ERR(__wt_strdup(session, config, &newcheck_name));
	if ((p = strchr(newcheck_name, '=')) != NULL)
		*p = '\0';

	/*
	 * The new configuration name may replace an existing check with new
	 * information, in that case skip the old version.
	 */
	cnt = 0;
	if ((*epp)->checks != NULL)
		for (cp = (*epp)->checks; cp->name != NULL; ++cp)
			++cnt;
	WT_ERR(__wt_calloc_def(session, cnt + 2, &checks));
	cnt = 0;
	if ((*epp)->checks != NULL)
		for (cp = (*epp)->checks; cp->name != NULL; ++cp)
			if (strcmp(newcheck_name, cp->name) != 0)
				checks[cnt++] = *cp;
	newcheck = &checks[cnt];
	newcheck->name = newcheck_name;
	WT_ERR(__wt_strdup(session, type, &newcheck->type));
	WT_ERR(__wt_strdup(session, check, &newcheck->checks));
	entry->checks = checks;
	entry->checks_entries = 0;

	/*
	 * Confirm the configuration string passes the new set of
	 * checks.
	 */
	WT_ERR(__wt_config_check(session, entry, config, 0));

	/*
	 * The next time this configuration is updated, we don't want to figure
	 * out which of these pieces of memory were allocated and will need to
	 * be free'd on close (this isn't a heavily used API and it's too much
	 * work); add them all to the free-on-close list now.  We don't check
	 * for errors deliberately, we'd have to figure out which elements have
	 * already been added to the free-on-close array and which have not in
	 * order to avoid freeing chunks of memory twice.  Again, this isn't a
	 * commonly used API and it shouldn't ever happen, just leak it.
	 */
	__wt_conn_foc_add(session, entry->base);
	__wt_conn_foc_add(session, entry);
	__wt_conn_foc_add(session, checks);
	__wt_conn_foc_add(session, newcheck->type);
	__wt_conn_foc_add(session, newcheck->checks);
	__wt_conn_foc_add(session, newcheck_name);

	/*
	 * Instead of using locks to protect configuration information, assume
	 * we can atomically update a pointer to a chunk of memory, and because
	 * a pointer is never partially written, readers will correctly see the
	 * original or new versions of the memory.  Readers might be using the
	 * old version as it's being updated, though, which means we cannot free
	 * the old chunk of memory until all possible readers have finished.
	 * Currently, that's on connection close: in other words, we can use
	 * this because it's small amounts of memory, and we really, really do
	 * not want to acquire locks every time we access configuration strings,
	 * since that's done on every API call.
	 */
	WT_PUBLISH(*epp, entry);

	if (0) {
err:		if (entry != NULL) {
			__wt_free(session, entry->base);
			__wt_free(session, entry);
		}
		__wt_free(session, checks);
		if (newcheck != NULL) {
			__wt_free(session, newcheck->type);
			__wt_free(session, newcheck->checks);
		}
		__wt_free(session, newcheck_name);
	}

	__wt_spin_unlock(session, &conn->api_lock);
	return (ret);
}
Example #11
0
/*
 * __wt_win_directory_list --
 *	Get a list of files from a directory, MSVC version.
 */
int
__wt_win_directory_list(WT_FILE_SYSTEM *file_system,
    WT_SESSION *wt_session, const char *directory,
    const char *prefix, char ***dirlistp, uint32_t *countp)
{
	DWORD windows_error;
	HANDLE findhandle;
	WIN32_FIND_DATAW finddata;
	WT_DECL_ITEM(pathbuf);
	WT_DECL_ITEM(file_utf8);
	WT_DECL_ITEM(pathbuf_wide);
	WT_DECL_ITEM(prefix_wide);
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	size_t dirallocsz, pathlen, prefix_widelen;
	uint32_t count;
	char *dir_copy, **entries;

	session = (WT_SESSION_IMPL *)wt_session;

	*dirlistp = NULL;
	*countp = 0;

	findhandle = INVALID_HANDLE_VALUE;
	dirallocsz = 0;
	entries = NULL;

	WT_ERR(__wt_strdup(session, directory, &dir_copy));
	pathlen = strlen(dir_copy);
	if (dir_copy[pathlen - 1] == '\\')
		dir_copy[pathlen - 1] = '\0';
	WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
	WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy));

	WT_ERR(__wt_to_utf16_string(session, pathbuf->data, &pathbuf_wide));
	WT_ERR(__wt_to_utf16_string(session, prefix, &prefix_wide));
	prefix_widelen = wcslen(prefix_wide->data);

	findhandle = FindFirstFileW(pathbuf_wide->data, &finddata);
	if (findhandle == INVALID_HANDLE_VALUE) {
		windows_error = __wt_getlasterror();
		__wt_errx(session,
		    "%s: directory-list: FindFirstFile: %s",
		    pathbuf->data, __wt_formatmessage(session, windows_error));
		WT_ERR(__wt_map_windows_error(windows_error));
	}

	count = 0;
	do {
		/*
		 * Skip . and ..
		 */
		if (wcscmp(finddata.cFileName, L".") == 0 ||
		    wcscmp(finddata.cFileName, L"..") == 0)
			continue;

		/* The list of files is optionally filtered by a prefix. */
		if (prefix != NULL &&
		    wcsncmp(finddata.cFileName, prefix_wide->data,
			prefix_widelen) != 0)
			continue;

		WT_ERR(__wt_realloc_def(
		    session, &dirallocsz, count + 1, &entries));

		WT_ERR(__wt_to_utf8_string(
		    session, finddata.cFileName, &file_utf8));
		WT_ERR(__wt_strdup(session, file_utf8->data, &entries[count]));
		++count;
		__wt_scr_free(session, &file_utf8);
	} while (FindNextFileW(findhandle, &finddata) != 0);

	*dirlistp = entries;
	*countp = count;

err:	if (findhandle != INVALID_HANDLE_VALUE)
		if (FindClose(findhandle) == 0) {
			windows_error = __wt_getlasterror();
			__wt_errx(session,
			    "%s: directory-list: FindClose: %s",
			    pathbuf->data,
			    __wt_formatmessage(session, windows_error));
			if (ret == 0)
				ret = __wt_map_windows_error(windows_error);
		}

	__wt_free(session, dir_copy);
	__wt_scr_free(session, &pathbuf);
	__wt_scr_free(session, &file_utf8);
	__wt_scr_free(session, &pathbuf_wide);
	__wt_scr_free(session, &prefix_wide);

	if (ret == 0)
		return (0);

	WT_TRET(__wt_win_directory_list_free(
	    file_system, wt_session, entries, count));

	WT_RET_MSG(session, ret,
	    "%s: directory-list, prefix \"%s\"",
	    directory, prefix == NULL ? "" : prefix);
}
Example #12
0
/*
 * __conn_dhandle_get --
 *	Allocate a new data handle, lock it exclusively, and return it linked
 *	into the connection's list.
 */
static int
__conn_dhandle_get(WT_SESSION_IMPL *session,
    const char *name, const char *ckpt, uint32_t flags)
{
	WT_BTREE *btree;
	WT_CONNECTION_IMPL *conn;
	WT_DATA_HANDLE *dhandle;
	WT_DECL_RET;
	uint32_t bucket;

	conn = S2C(session);

	/*
	 * We have the handle lock, check whether we can find the handle we
	 * are looking for.  If we do, and we can lock it in the state we
	 * want, this session will take ownership and we are done.
	 */
	ret = __wt_conn_dhandle_find(session, name, ckpt, flags);
	if (ret == 0) {
		dhandle = session->dhandle;
		WT_RET(__conn_dhandle_open_lock(session, dhandle, flags));
		return (0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If no handle was found, allocate the data handle and a btree handle,
	 * then initialize the data handle.  Exclusively lock the data handle
	 * before inserting it in the list.
	 */
	WT_RET(__wt_calloc_one(session, &dhandle));

	WT_ERR(__wt_rwlock_alloc(session, &dhandle->rwlock, "data handle"));

	dhandle->name_hash = __wt_hash_city64(name, strlen(name));
	WT_ERR(__wt_strdup(session, name, &dhandle->name));
	if (ckpt != NULL)
		WT_ERR(__wt_strdup(session, ckpt, &dhandle->checkpoint));

	WT_ERR(__wt_calloc_one(session, &btree));
	dhandle->handle = btree;
	btree->dhandle = dhandle;

	WT_ERR(__wt_spin_init(
	    session, &dhandle->close_lock, "data handle close"));

	F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
	WT_ERR(__wt_writelock(session, dhandle->rwlock));

	/*
	 * Prepend the handle to the connection list, assuming we're likely to
	 * need new files again soon, until they are cached by all sessions.
	 * Find the right hash bucket to insert into as well.
	 */
	WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED));
	bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
	WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket);

	session->dhandle = dhandle;
	return (0);

err:	WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
	__wt_free(session, dhandle->name);
	__wt_free(session, dhandle->checkpoint);
	__wt_free(session, dhandle->handle);		/* btree free */
	__wt_spin_destroy(session, &dhandle->close_lock);
	__wt_overwrite_and_free(session, dhandle);

	return (ret);
}
Example #13
0
/*
 * __posix_open_file --
 *	Open a file handle.
 */
static int
__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
    WT_FILE_HANDLE **file_handlep)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FILE_HANDLE *file_handle;
	WT_FILE_HANDLE_POSIX *pfh;
	WT_SESSION_IMPL *session;
	mode_t mode;
	int f;

	WT_UNUSED(file_system);

	*file_handlep = NULL;

	session = (WT_SESSION_IMPL *)wt_session;
	conn = S2C(session);

	WT_RET(__wt_calloc_one(session, &pfh));

	/* Set up error handling. */
	pfh->fd = -1;

	if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) {
		f = O_RDONLY;
#ifdef O_CLOEXEC
		/*
		 * Security:
		 * The application may spawn a new process, and we don't want
		 * another process to have access to our file handles.
		 */
		f |= O_CLOEXEC;
#endif
		WT_SYSCALL_RETRY((
		    (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
		if (ret != 0)
			WT_ERR_MSG(session, ret, "%s: handle-open: open", name);
		WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
		goto directory_open;
	}

	f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR;
	if (LF_ISSET(WT_OPEN_CREATE)) {
		f |= O_CREAT;
		if (LF_ISSET(WT_OPEN_EXCLUSIVE))
			f |= O_EXCL;
		mode = 0666;
	} else
		mode = 0;

#ifdef O_BINARY
	/* Windows clones: we always want to treat the file as a binary. */
	f |= O_BINARY;
#endif
#ifdef O_CLOEXEC
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 */
	f |= O_CLOEXEC;
#endif
#ifdef O_DIRECT
	/* Direct I/O. */
	if (LF_ISSET(WT_OPEN_DIRECTIO)) {
		f |= O_DIRECT;
		pfh->direct_io = true;
	} else
		pfh->direct_io = false;
#endif
#ifdef O_NOATIME
	/* Avoid updating metadata for read-only workloads. */
	if (file_type == WT_OPEN_FILE_TYPE_DATA)
		f |= O_NOATIME;
#endif

	if (file_type == WT_OPEN_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
#ifdef O_DSYNC
		f |= O_DSYNC;
#elif defined(O_SYNC)
		f |= O_SYNC;
#else
		WT_ERR_MSG(session, ENOTSUP,
		    "unsupported log sync mode configured");
#endif
	}

	WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
	if (ret != 0)
		WT_ERR_MSG(session, ret,
		    pfh->direct_io ?
		    "%s: handle-open: open: failed with direct I/O configured, "
		    "some filesystem types do not support direct I/O" :
		    "%s: handle-open: open", name);
	WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));

#if defined(HAVE_POSIX_FADVISE)
	/*
	 * Disable read-ahead on trees: it slows down random read workloads.
	 * Ignore fadvise when doing direct I/O, the kernel cache isn't
	 * interesting.
	 */
	if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) {
		WT_SYSCALL(
		    posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret);
		if (ret != 0)
			WT_ERR_MSG(session, ret,
			    "%s: handle-open: posix_fadvise", name);
	}
#endif

directory_open:
	/* Initialize public information. */
	file_handle = (WT_FILE_HANDLE *)pfh;
	WT_ERR(__wt_strdup(session, name, &file_handle->name));

	file_handle->close = __posix_file_close;
#if defined(HAVE_POSIX_FADVISE)
	/*
	 * Ignore fadvise when doing direct I/O, the kernel cache isn't
	 * interesting.
	 */
	if (!pfh->direct_io)
		file_handle->fh_advise = __posix_file_advise;
#endif
	file_handle->fh_allocate = __wt_posix_file_fallocate;
	file_handle->fh_lock = __posix_file_lock;
#ifdef WORDS_BIGENDIAN
	/*
	 * The underlying objects are little-endian, mapping objects isn't
	 * currently supported on big-endian systems.
	 */
#else
	file_handle->fh_map = __wt_posix_map;
#ifdef HAVE_POSIX_MADVISE
	file_handle->fh_map_discard = __wt_posix_map_discard;
	file_handle->fh_map_preload = __wt_posix_map_preload;
#endif
	file_handle->fh_unmap = __wt_posix_unmap;
#endif
	file_handle->fh_read = __posix_file_read;
	file_handle->fh_size = __posix_file_size;
	file_handle->fh_sync = __posix_file_sync;
#ifdef HAVE_SYNC_FILE_RANGE
	file_handle->fh_sync_nowait = __posix_file_sync_nowait;
#endif
	file_handle->fh_truncate = __posix_file_truncate;
	file_handle->fh_write = __posix_file_write;

	*file_handlep = file_handle;

	return (0);

err:	WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
	return (ret);
}
Example #14
0
/*
 * __ckpt_server_config --
 *	Parse and setup the checkpoint server options.
 */
static int
__ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	char *p;

	conn = S2C(session);

	/*
	 * The checkpoint configuration requires a wait time and/or a log
	 * size -- if one is not set, we're not running at all.
	 * Checkpoints based on log size also require logging be enabled.
	 */
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
	conn->ckpt_usecs = (uint64_t)cval.val * 1000000;

	WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
	conn->ckpt_logsize = (wt_off_t)cval.val;

	/* Checkpoints are incompatible with in-memory configuration */
	if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In memory configuration incompatible with "
			    "checkpoints");
	}

	__wt_log_written_reset(session);
	if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) ||
	    (conn->ckpt_logsize && conn->ckpt_usecs == 0 &&
	     !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
		*startp = false;
		return (0);
	}
	*startp = true;

	/*
	 * The application can specify a checkpoint name, which we ignore if
	 * it's our default.
	 */
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));
	if (cval.len != 0 &&
	    !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
		WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));

		WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
		WT_ERR(__wt_buf_fmt(
		    session, tmp, "name=%.*s", (int)cval.len, cval.str));
		WT_ERR(__wt_strdup(session, tmp->data, &p));

		__wt_free(session, conn->ckpt_config);
		conn->ckpt_config = p;
	}

err:	__wt_scr_free(session, &tmp);
	return (ret);
}
Example #15
0
/*
 * __wt_open --
 *	Open a file handle.
 */
int
__wt_open(WT_SESSION_IMPL *session,
    const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *fh, *tfh;
	mode_t mode;
	int direct_io, f, fd, matched;
	const char *path;

	conn = S2C(session);
	fh = NULL;
	fd = -1;
	path = NULL;

	WT_VERBOSE_RET(session, fileops, "%s: open", name);

	/* Increment the reference count if we already have the file open. */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhqh, q)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->refcnt;
			*fhp = tfh;
			matched = 1;
			break;
		}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched)
		return (0);

	WT_RET(__wt_filename(session, name, &path));

	f = O_RDWR;
#ifdef O_BINARY
	/* Windows clones: we always want to treat the file as a binary. */
	f |= O_BINARY;
#endif
#ifdef O_CLOEXEC
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.
	 */
	f |= O_CLOEXEC;
#endif
#ifdef O_NOATIME
	/* Avoid updating metadata for read-only workloads. */
	if (dio_type == WT_FILE_TYPE_DATA)
		f |= O_NOATIME;
#endif

	if (ok_create) {
		f |= O_CREAT;
		if (exclusive)
			f |= O_EXCL;
		mode = 0666;
	} else
		mode = 0;

	direct_io = 0;
#ifdef O_DIRECT
	if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
		f |= O_DIRECT;
		direct_io = 1;
	}
#endif
	if (dio_type == WT_FILE_TYPE_LOG &&
	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
#ifdef O_DSYNC
		f |= O_DSYNC;
#elif defined(O_SYNC)
		f |= O_SYNC;
#else
		WT_ERR_MSG(session, ENOTSUP,
		    "Unsupported log sync mode requested");
#endif
	WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret);
	if (ret != 0)
		WT_ERR_MSG(session, ret,
		    direct_io ?
		    "%s: open failed with direct I/O configured, some "
		    "filesystem types do not support direct I/O" : "%s", path);

#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
	/*
	 * Security:
	 * The application may spawn a new process, and we don't want another
	 * process to have access to our file handles.  There's an obvious
	 * race here, so we prefer the flag to open if available.
	 */
	if ((f = fcntl(fd, F_GETFD)) == -1 ||
	    fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
		WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name);
#endif

#if defined(HAVE_POSIX_FADVISE)
	/* Disable read-ahead on trees: it slows down random read workloads. */
	if (dio_type == WT_FILE_TYPE_DATA)
		WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
#endif

	if (F_ISSET(conn, WT_CONN_CKPT_SYNC))
		WT_ERR(__open_directory_sync(session));

	WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh));
	WT_ERR(__wt_strdup(session, name, &fh->name));
	fh->fd = fd;
	fh->refcnt = 1;
	fh->direct_io = direct_io;

	/* Set the file's size. */
	WT_ERR(__wt_filesize(session, fh, &fh->size));

	/* Configure file extension. */
	if (dio_type == WT_FILE_TYPE_DATA)
		fh->extend_len = conn->data_extend_len;

	/*
	 * Repeat the check for a match, but then link onto the database's list
	 * of files.
	 */
	matched = 0;
	__wt_spin_lock(session, &conn->fh_lock);
	TAILQ_FOREACH(tfh, &conn->fhqh, q)
		if (strcmp(name, tfh->name) == 0) {
			++tfh->refcnt;
			*fhp = tfh;
			matched = 1;
			break;
		}
	if (!matched) {
		TAILQ_INSERT_TAIL(&conn->fhqh, fh, q);
		WT_STAT_FAST_CONN_INCR(session, file_open);

		*fhp = fh;
	}
	__wt_spin_unlock(session, &conn->fh_lock);
	if (matched) {
err:		if (fh != NULL) {
			__wt_free(session, fh->name);
			__wt_free(session, fh);
		}
		if (fd != -1)
			(void)close(fd);
	}

	__wt_free(session, path);
	return (ret);
}
Example #16
0
/*
 * __wt_block_open --
 *	Open a block handle.
 */
int
__wt_block_open(WT_SESSION_IMPL *session,
    const char *filename, const char *cfg[],
    int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp)
{
	WT_BLOCK *block;
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	WT_VERBOSE_TRET(session, block, "open: %s", filename);

	conn = S2C(session);
	*blockp = NULL;

	__wt_spin_lock(session, &conn->block_lock);
	TAILQ_FOREACH(block, &conn->blockqh, q)
		if (strcmp(filename, block->name) == 0) {
			++block->ref;
			*blockp = block;
			__wt_spin_unlock(session, &conn->block_lock);
			return (0);
		}

	/* Basic structure allocation, initialization. */
	WT_ERR(__wt_calloc_def(session, 1, &block));
	block->ref = 1;
	TAILQ_INSERT_HEAD(&conn->blockqh, block, q);

	WT_ERR(__wt_strdup(session, filename, &block->name));
	block->allocsize = allocsize;

	WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval));
	block->allocfirst =
	    WT_STRING_MATCH("first", cval.str, cval.len) ? 1 : 0;

	/* Configuration: optional OS buffer cache maximum size. */
	WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval));
	block->os_cache_max = cval.val;
#ifdef HAVE_POSIX_FADVISE
	if (conn->direct_io && block->os_cache_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_max not supported in combination with direct_io");
#else
	if (block->os_cache_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_max not supported if posix_fadvise not "
		    "available");
#endif

	/* Configuration: optional immediate write scheduling flag. */
	WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval));
	block->os_cache_dirty_max = cval.val;
#ifdef HAVE_SYNC_FILE_RANGE
	if (conn->direct_io && block->os_cache_dirty_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_dirty_max not supported in combination with "
		    "direct_io");
#else
	if (block->os_cache_dirty_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_dirty_max not supported if sync_file_range not "
		    "available");
#endif

	/* Open the underlying file handle. */
	WT_ERR(__wt_open(
	    session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh));

	/* Initialize the live checkpoint's lock. */
	WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));

	/*
	 * Read the description information from the first block.
	 *
	 * Salvage is a special case: if we're forcing the salvage, we don't
	 * look at anything, including the description information.
	 */
	if (!forced_salvage)
		WT_ERR(__desc_read(session, block));

	*blockp = block;
	__wt_spin_unlock(session, &conn->block_lock);
	return (0);

err:	WT_TRET(__block_destroy(session, block));
	__wt_spin_unlock(session, &conn->block_lock);
	return (ret);
}