/* * __wt_dirlist -- * Get a list of files from a directory, optionally filtered by * a given prefix. */ int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { struct dirent *dp; DIR *dirp; WT_DECL_RET; size_t dirallocsz; u_int count, dirsz; int match; char **entries, *path; *dirlist = NULL; *countp = 0; WT_RET(__wt_filename(session, dir, &path)); dirp = NULL; dirallocsz = 0; dirsz = 0; entries = NULL; if (flags == 0) LF_SET(WT_DIRLIST_INCLUDE); WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS, "wt_dirlist of %s %s prefix %s", path, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", prefix == NULL ? "all" : prefix)); WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: opendir", path); for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) { /* * Skip . and .. */ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; match = 0; if (prefix != NULL && ((LF_ISSET(WT_DIRLIST_INCLUDE) && WT_PREFIX_MATCH(dp->d_name, prefix)) || (LF_ISSET(WT_DIRLIST_EXCLUDE) && !WT_PREFIX_MATCH(dp->d_name, prefix)))) match = 1; if (prefix == NULL || match) { /* * We have a file name we want to return. */ count++; if (count > dirsz) { dirsz += WT_DIR_ENTRY; WT_ERR(__wt_realloc_def( session, &dirallocsz, dirsz, &entries)); } WT_ERR(__wt_strdup( session, dp->d_name, &entries[count-1])); } } if (count > 0) *dirlist = entries; *countp = count; err: if (dirp != NULL) (void)closedir(dirp); __wt_free(session, path); if (ret == 0) return (0); if (*dirlist != NULL) { for (count = dirsz; count > 0; count--) __wt_free(session, entries[count]); __wt_free(session, entries); } WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix); }
/* * __wt_schema_open_table -- * Open a named table. */ int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, int ok_incomplete, WT_TABLE **tablep) { WT_CONFIG cparser; WT_CONFIG_ITEM ckey, cval; WT_CURSOR *cursor; WT_DECL_ITEM(buf); WT_DECL_RET; WT_TABLE *table; const char *tconfig; char *tablename; cursor = NULL; table = NULL; tablename = NULL; WT_ASSERT(session, F_ISSET(session, WT_SESSION_TABLE_LOCKED)); WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "table:%.*s", (int)namelen, name)); WT_ERR(__wt_strndup(session, buf->data, buf->size, &tablename)); WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tablename); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &tconfig)); WT_ERR(__wt_calloc_def(session, 1, &table)); table->name = tablename; tablename = NULL; WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval)); WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format)); WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format)); WT_ERR(__wt_strdup(session, tconfig, &table->config)); /* Point to some items in the copy to save re-parsing. */ WT_ERR(__wt_config_getones(session, table->config, "columns", &table->colconf)); /* * Count the number of columns: tables are "simple" if the columns * are not named. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf)); table->is_simple = 1; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) table->is_simple = 0; if (ret != WT_NOTFOUND) goto err; /* Check that the columns match the key and value formats. */ if (!table->is_simple) WT_ERR(__wt_schema_colcheck(session, table->key_format, table->value_format, &table->colconf, &table->nkey_columns, NULL)); WT_ERR(__wt_config_getones(session, table->config, "colgroups", &table->cgconf)); /* Count the number of column groups. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf)); table->ncolgroups = 0; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) ++table->ncolgroups; if (ret != WT_NOTFOUND) goto err; if (table->ncolgroups > 0 && table->is_simple) WT_ERR_MSG(session, EINVAL, "%s requires a table with named columns", tablename); WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups)); WT_ERR(__wt_schema_open_colgroups(session, table)); if (!ok_incomplete && !table->cg_complete) WT_ERR_MSG(session, EINVAL, "'%s' cannot be used " "until all column groups are created", table->name); /* Copy the schema generation into the new table. */ table->schema_gen = S2C(session)->schema_gen; *tablep = table; if (0) { err: if (table != NULL) WT_TRET(__wt_schema_destroy_table(session, table)); } if (cursor != NULL) WT_TRET(cursor->close(cursor)); __wt_free(session, tablename); __wt_scr_free(&buf); return (ret); }
/* * __win_open_file -- * Open a file handle. */ static int __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { DWORD dwCreationDisposition, windows_error; WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(name_wide); WT_DECL_RET; WT_FILE_HANDLE *file_handle; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; int desired_access, f; WT_UNUSED(file_system); session = (WT_SESSION_IMPL *)wt_session; conn = S2C(session); *file_handlep = NULL; WT_RET(__wt_calloc_one(session, &win_fh)); win_fh->direct_io = false; /* Set up error handling. */ win_fh->filehandle = win_fh->filehandle_secondary = INVALID_HANDLE_VALUE; WT_ERR(__wt_to_utf16_string(session, name, &name_wide)); /* * Opening a file handle on a directory is only to support filesystems * that require a directory sync for durability, and Windows doesn't * require that functionality: create an empty WT_FH structure with * invalid handles. */ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) goto directory_open; desired_access = GENERIC_READ; if (!LF_ISSET(WT_FS_OPEN_READONLY)) desired_access |= GENERIC_WRITE; /* * Security: * The application may spawn a new process, and we don't want another * process to have access to our file handles. * * TODO: Set tighter file permissions but set bInheritHandle to false * to prevent inheritance */ f = FILE_ATTRIBUTE_NORMAL; dwCreationDisposition = 0; if (LF_ISSET(WT_FS_OPEN_CREATE)) { dwCreationDisposition = CREATE_NEW; if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE)) dwCreationDisposition = CREATE_ALWAYS; } else dwCreationDisposition = OPEN_EXISTING; /* Direct I/O. */ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) { f |= FILE_FLAG_NO_BUFFERING; win_fh->direct_io = true; } /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ if (FLD_ISSET(conn->write_through, file_type)) f |= FILE_FLAG_WRITE_THROUGH; if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; /* If the user indicated a random workload, disable read-ahead. */ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && LF_ISSET(WT_FS_OPEN_ACCESS_RAND)) f |= FILE_FLAG_RANDOM_ACCESS; /* If the user indicated a sequential workload, set that. */ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && LF_ISSET(WT_FS_OPEN_ACCESS_SEQ)) f |= FILE_FLAG_SEQUENTIAL_SCAN; win_fh->filehandle = CreateFileW(name_wide->data, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL); if (win_fh->filehandle == INVALID_HANDLE_VALUE) { if (LF_ISSET(WT_FS_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) win_fh->filehandle = CreateFileW(name_wide->data, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); if (win_fh->filehandle == INVALID_HANDLE_VALUE) { windows_error = __wt_getlasterror(); ret = __wt_map_windows_error(windows_error); __wt_err(session, ret, win_fh->direct_io ? "%s: handle-open: CreateFileW: failed with direct " "I/O configured, some filesystem types do not " "support direct I/O: %s" : "%s: handle-open: CreateFileW: %s", name, __wt_formatmessage(session, windows_error)); WT_ERR(ret); } } /* * Open a second handle to file to support file extension/truncation * concurrently with reads on the file. Writes would also move the * file pointer. */ if (!LF_ISSET(WT_FS_OPEN_READONLY)) { win_fh->filehandle_secondary = CreateFileW(name_wide->data, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) { windows_error = __wt_getlasterror(); ret = __wt_map_windows_error(windows_error); __wt_err(session, ret, "%s: handle-open: Creatively: secondary: %s", name, __wt_formatmessage(session, windows_error)); WT_ERR(ret); } } directory_open: /* Initialize public information. */ file_handle = (WT_FILE_HANDLE *)win_fh; WT_ERR(__wt_strdup(session, name, &file_handle->name)); file_handle->close = __win_file_close; file_handle->fh_lock = __win_file_lock; #ifdef WORDS_BIGENDIAN /* * The underlying objects are little-endian, mapping objects isn't * currently supported on big-endian systems. */ #else file_handle->fh_map = __wt_win_map; file_handle->fh_unmap = __wt_win_unmap; #endif file_handle->fh_read = __win_file_read; file_handle->fh_size = __win_file_size; file_handle->fh_sync = __win_file_sync; /* Extend and truncate share the same implementation. */ file_handle->fh_extend = __win_file_set_end; file_handle->fh_truncate = __win_file_set_end; file_handle->fh_write = __win_file_write; *file_handlep = file_handle; __wt_scr_free(session, &name_wide); return (0); err: __wt_scr_free(session, &name_wide); WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session)); return (ret); }
/* * __wt_schema_open_table -- * Open a named table. */ int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, WT_TABLE **tablep) { WT_CONFIG cparser; WT_CONFIG_ITEM ckey, cval; WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM buf; WT_TABLE *table; const char *tconfig; char *tablename; cursor = NULL; table = NULL; WT_CLEAR(buf); WT_RET(__wt_buf_fmt(session, &buf, "table:%.*s", (int)namelen, name)); tablename = __wt_buf_steal(session, &buf, NULL); WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tablename); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &tconfig)); WT_ERR(__wt_calloc_def(session, 1, &table)); table->name = tablename; tablename = NULL; WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval)); WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format)); WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format)); WT_ERR(__wt_strdup(session, tconfig, &table->config)); /* Point to some items in the copy to save re-parsing. */ WT_ERR(__wt_config_getones(session, table->config, "columns", &table->colconf)); /* * Count the number of columns: tables are "simple" if the columns * are not named. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf)); table->is_simple = 1; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) table->is_simple = 0; if (ret != WT_NOTFOUND) goto err; /* Check that the columns match the key and value formats. */ if (!table->is_simple) WT_ERR(__wt_schema_colcheck(session, table->key_format, table->value_format, &table->colconf, &table->nkey_columns, NULL)); WT_ERR(__wt_config_getones(session, table->config, "colgroups", &table->cgconf)); /* Count the number of column groups. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf)); table->ncolgroups = 0; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) ++table->ncolgroups; if (ret != WT_NOTFOUND) goto err; WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups)); WT_ERR(__wt_schema_open_colgroups(session, table)); *tablep = table; if (0) { err: if (table != NULL) __wt_schema_destroy_table(session, table); } if (cursor != NULL) WT_TRET(cursor->close(cursor)); __wt_free(session, tablename); return (ret); }
/* * __wt_schema_open_index -- * Open one or more indices for a table. */ int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) { WT_CURSOR *cursor; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_INDEX *idx; u_int i; int cmp, match; const char *idxconf, *name, *tablename, *uri; /* Check if we've already done the work. */ if (idxname == NULL && table->idx_complete) return (0); cursor = NULL; idx = NULL; /* Build a search key. */ tablename = table->name; (void)WT_PREFIX_SKIP(tablename, "table:"); WT_ERR(__wt_scr_alloc(session, 512, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename)); /* Find matching indices. */ WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tmp->data); if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0) ret = cursor->next(cursor); for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &uri)); name = uri; if (!WT_PREFIX_SKIP(name, tmp->data)) break; /* Is this the index we are looking for? */ match = idxname == NULL || WT_STRING_MATCH(name, idxname, len); /* * Ensure there is space, including if we have to make room for * a new entry in the middle of the list. */ WT_ERR(__wt_realloc_def(session, &table->idx_alloc, WT_MAX(i, table->nindices) + 1, &table->indices)); /* Keep the in-memory list in sync with the metadata. */ cmp = 0; while (table->indices[i] != NULL && (cmp = strcmp(uri, table->indices[i]->name)) > 0) { /* Index no longer exists, remove it. */ __wt_free(session, table->indices[i]); memmove(&table->indices[i], &table->indices[i + 1], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[--table->nindices] = NULL; } if (cmp < 0) { /* Make room for a new index. */ memmove(&table->indices[i + 1], &table->indices[i], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[i] = NULL; ++table->nindices; } if (!match) continue; if (table->indices[i] == NULL) { WT_ERR(cursor->get_value(cursor, &idxconf)); WT_ERR(__wt_calloc_def(session, 1, &idx)); WT_ERR(__wt_strdup(session, uri, &idx->name)); WT_ERR(__wt_strdup(session, idxconf, &idx->config)); WT_ERR(__open_index(session, table, idx)); table->indices[i] = idx; idx = NULL; } /* If we were looking for a single index, we're done. */ if (indexp != NULL) *indexp = table->indices[i]; if (idxname != NULL) break; } WT_ERR_NOTFOUND_OK(ret); /* If we did a full pass, we won't need to do it again. */ if (idxname == NULL) { table->nindices = i; table->idx_complete = 1; } err: __wt_scr_free(&tmp); if (idx != NULL) WT_TRET(__wt_schema_destroy_index(session, idx)); if (cursor != NULL) WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_open -- * Open a file handle. */ int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; bool lock_file, open_called; char *path; WT_ASSERT(session, file_type != 0); /* A file type is required. */ conn = S2C(session); fh = NULL; open_called = false; path = NULL; WT_RET(__open_verbose(session, name, file_type, flags)); /* Check if the handle is already open. */ if (__wt_handle_search(session, name, true, NULL, &fh)) { /* * XXX * The in-memory implementation has to reset the file offset * when a file is re-opened (which obviously also depends on * in-memory configurations never opening a file in more than * one thread at a time). This needs to be fixed. */ if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) fh->off = 0; *fhp = fh; return (0); } /* Allocate a structure and set the name. */ WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); /* * If this is a read-only connection, open all files read-only except * the lock file. * * The only file created in read-only mode is the lock file. */ if (F_ISSET(conn, WT_CONN_READONLY)) { lock_file = strcmp(name, WT_SINGLETHREAD) == 0; if (!lock_file) LF_SET(WT_OPEN_READONLY); WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); } /* Create the path to the file. */ if (!LF_ISSET(WT_OPEN_FIXED)) WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ WT_ERR(conn->handle_open( session, fh, path == NULL ? name : path, file_type, flags)); open_called = true; /* * Repeat the check for a match: if there's no match, link our newly * created handle onto the database's list of files. */ if (__wt_handle_search(session, name, true, fh, fhp)) { err: if (open_called) WT_TRET(fh->fh_close(session, fh)); if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); } } __wt_free(session, path); return (ret); }
/* * __wt_open -- * Open a file handle. */ int __wt_open(WT_SESSION_IMPL *session, const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp) { DWORD dwCreationDisposition; HANDLE filehandle, filehandle_secondary; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh, *tfh; uint64_t bucket, hash; int direct_io, f, matched, share_mode; char *path; conn = S2C(session); fh = NULL; path = NULL; filehandle = INVALID_HANDLE_VALUE; filehandle_secondary = INVALID_HANDLE_VALUE; direct_io = 0; hash = __wt_hash_city64(name, strlen(name)); bucket = hash % WT_HASH_ARRAY_SIZE; WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); /* Increment the reference count if we already have the file open. */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; matched = 1; break; } __wt_spin_unlock(session, &conn->fh_lock); if (matched) return (0); /* For directories, create empty file handles with invalid handles */ if (dio_type == WT_FILE_TYPE_DIRECTORY) { goto setupfh; } WT_RET(__wt_filename(session, name, &path)); share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE; /* * Security: * The application may spawn a new process, and we don't want another * process to have access to our file handles. * * TODO: Set tighter file permissions but set bInheritHandle to false * to prevent inheritance */ f = FILE_ATTRIBUTE_NORMAL; dwCreationDisposition = 0; if (ok_create) { dwCreationDisposition = CREATE_NEW; if (exclusive) dwCreationDisposition = CREATE_ALWAYS; } else dwCreationDisposition = OPEN_EXISTING; if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { f |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; direct_io = 1; } if (dio_type == WT_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { f |= FILE_FLAG_WRITE_THROUGH; } /* Disable read-ahead on trees: it slows down random read workloads. */ if (dio_type == WT_FILE_TYPE_DATA || dio_type == WT_FILE_TYPE_CHECKPOINT) f |= FILE_FLAG_RANDOM_ACCESS; filehandle = CreateFileA(path, (GENERIC_READ | GENERIC_WRITE), share_mode, NULL, dwCreationDisposition, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) { if (GetLastError() == ERROR_FILE_EXISTS && ok_create) filehandle = CreateFileA(path, (GENERIC_READ | GENERIC_WRITE), share_mode, NULL, OPEN_EXISTING, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_errno(), direct_io ? "%s: open failed with direct I/O configured, some " "filesystem types do not support direct I/O" : "%s", path); } /* * Open a second handle to file to support allocation/truncation * concurrently with reads on the file. Writes would also move the file * pointer. */ filehandle_secondary = CreateFileA(path, (GENERIC_READ | GENERIC_WRITE), share_mode, NULL, OPEN_EXISTING, f, NULL); if (filehandle == INVALID_HANDLE_VALUE) WT_ERR_MSG(session, __wt_errno(), "open failed for secondary handle: %s", path); setupfh: WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); fh->name_hash = hash; fh->filehandle = filehandle; fh->filehandle_secondary = filehandle_secondary; fh->ref = 1; fh->direct_io = direct_io; /* Set the file's size. */ if (dio_type != WT_FILE_TYPE_DIRECTORY) WT_ERR(__wt_filesize(session, fh, &fh->size)); /* Configure file extension. */ if (dio_type == WT_FILE_TYPE_DATA || dio_type == WT_FILE_TYPE_CHECKPOINT) fh->extend_len = conn->data_extend_len; /* Configure fallocate/posix_fallocate calls. */ __wt_fallocate_config(session, fh); /* * Repeat the check for a match, but then link onto the database's list * of files. */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) if (strcmp(name, tfh->name) == 0) { ++tfh->ref; *fhp = tfh; matched = 1; break; } if (!matched) { WT_CONN_FILE_INSERT(conn, fh, bucket); (void)WT_ATOMIC_ADD4(conn->open_file_count, 1); *fhp = fh; } __wt_spin_unlock(session, &conn->fh_lock); if (matched) { err: if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); } if (filehandle != INVALID_HANDLE_VALUE) (void)CloseHandle(filehandle); if (filehandle_secondary != INVALID_HANDLE_VALUE) (void)CloseHandle(filehandle_secondary); } __wt_free(session, path); return (ret); }
/* * __wt_curjoin_open -- * Initialize a join cursor. * * Join cursors are read-only. */ int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __curjoin_get_key, /* get-key */ __curjoin_get_value, /* get-value */ __wt_cursor_notsup, /* set-key */ __wt_cursor_notsup, /* set-value */ __wt_cursor_notsup, /* compare */ __wt_cursor_notsup, /* equals */ __curjoin_next, /* next */ __wt_cursor_notsup, /* prev */ __curjoin_reset, /* reset */ __wt_cursor_notsup, /* search */ __wt_cursor_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reconfigure */ __curjoin_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_JOIN *cjoin; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_TABLE *table; size_t size; const char *tablename, *columns; WT_STATIC_ASSERT(offsetof(WT_CURSOR_JOIN, iface) == 0); if (!WT_PREFIX_SKIP(uri, "join:")) return (EINVAL); tablename = uri; if (!WT_PREFIX_SKIP(tablename, "table:")) return (EINVAL); columns = strchr(tablename, '('); if (columns == NULL) size = strlen(tablename); else size = WT_PTRDIFF(columns, tablename); WT_RET(__wt_schema_get_table(session, tablename, size, 0, &table)); WT_RET(__wt_calloc_one(session, &cjoin)); cursor = &cjoin->iface; *cursor = iface; cursor->session = &session->iface; cursor->internal_uri = table->name; cursor->key_format = table->key_format; cursor->value_format = table->value_format; cjoin->table = table; /* Handle projections. */ WT_ERR(__wt_scr_alloc(session, 0, &tmp)); if (columns != NULL) { WT_ERR(__wt_struct_reformat(session, table, columns, strlen(columns), NULL, 1, tmp)); WT_ERR(__wt_strndup( session, tmp->data, tmp->size, &cursor->value_format)); WT_ERR(__wt_strdup(session, columns, &cjoin->projection)); } if (owner != NULL) WT_ERR(EINVAL); WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp)); if (0) { err: WT_TRET(__curjoin_close(cursor)); *cursorp = NULL; } __wt_scr_free(session, &tmp); return (ret); }
/* * __wt_configure_method -- * WT_CONNECTION.configure_method. */ int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) { const WT_CONFIG_CHECK *cp; WT_CONFIG_CHECK *checks, *newcheck; const WT_CONFIG_ENTRY **epp; WT_CONFIG_ENTRY *entry; WT_CONNECTION_IMPL *conn; WT_DECL_RET; size_t cnt; char *newcheck_name, *p; /* * !!! * We ignore the specified uri, that is, all new configuration options * will be valid for all data sources. That's shouldn't be too bad * as the worst that can happen is an application might specify some * configuration option and not get an error -- the option should be * ignored by the underlying implementation since it's unexpected, so * there shouldn't be any real problems. Eventually I expect we will * get the whole data-source thing sorted, at which time there may be * configuration arrays for each data source, and that's when the uri * will matter. */ WT_UNUSED(uri); conn = S2C(session); checks = newcheck = NULL; entry = NULL; newcheck_name = NULL; /* Argument checking; we only support a limited number of types. */ if (config == NULL) WT_RET_MSG(session, EINVAL, "no configuration specified"); if (type == NULL) WT_RET_MSG(session, EINVAL, "no configuration type specified"); if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 && strcmp(type, "list") != 0 && strcmp(type, "string") != 0) WT_RET_MSG(session, EINVAL, "type must be one of \"boolean\", \"int\", \"list\" or " "\"string\""); /* Find a match for the method name. */ for (epp = conn->config_entries; (*epp)->method != NULL; ++epp) if (strcmp((*epp)->method, method) == 0) break; if ((*epp)->method == NULL) WT_RET_MSG(session, WT_NOTFOUND, "no method matching %s found", method); /* * Technically possible for threads to race, lock the connection while * adding the new configuration information. We're holding the lock * for an extended period of time, but configuration changes should be * rare and only happen during startup. */ __wt_spin_lock(session, &conn->api_lock); /* * Allocate new configuration entry and fill it in. * * The new base value is the previous base value, a separator and the * new configuration string. */ WT_ERR(__wt_calloc_def(session, 1, &entry)); entry->method = (*epp)->method; WT_ERR(__wt_calloc_def(session, strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); (void)strcpy(p, (*epp)->base); (void)strcat(p, ","); (void)strcat(p, config); entry->base = p; /* * Build a new checks entry name field. There may be a default value * in the config argument we're passed, we don't want that as part of * the checks entry name field. */ WT_ERR(__wt_strdup(session, config, &newcheck_name)); if ((p = strchr(newcheck_name, '=')) != NULL) *p = '\0'; /* * Build a new checks array. The new configuration name may replace * an existing check with new information, in that case skip the old * version. */ for (cnt = 0, cp = (*epp)->checks; cp->name != NULL; ++cp) ++cnt; WT_ERR(__wt_calloc_def(session, cnt + 2, &checks)); for (cnt = 0, cp = (*epp)->checks; cp->name != NULL; ++cp) if (strcmp(newcheck_name, cp->name) != 0) checks[cnt++] = *cp; newcheck = &checks[cnt]; newcheck->name = newcheck_name; WT_ERR(__wt_strdup(session, type, &newcheck->type)); if (check != NULL) WT_ERR(__wt_strdup(session, check, &newcheck->checks)); entry->checks = checks; /* Confirm the configuration string passes the new set of checks. */ WT_ERR(config_check(session, entry->checks, config, 0)); /* * The next time this configuration is updated, we don't want to figure * out which of these pieces of memory were allocated and will need to * be free'd on close, add them to the list now. */ WT_ERR(__wt_conn_foc_add(session, entry, entry->base, checks, newcheck->name, newcheck->type, newcheck->checks, NULL)); *epp = entry; if (0) { err: if (entry != NULL) { __wt_free(session, entry->base); __wt_free(session, entry); } __wt_free(session, checks); if (newcheck != NULL) { __wt_free(session, newcheck->type); __wt_free(session, newcheck->checks); } __wt_free(session, newcheck_name); } __wt_spin_unlock(session, &conn->api_lock); return (ret); }
/* * __wt_configure_method -- * WT_CONNECTION.configure_method. */ int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) { const WT_CONFIG_CHECK *cp; WT_CONFIG_CHECK *checks, *newcheck; const WT_CONFIG_ENTRY **epp; WT_CONFIG_ENTRY *entry; WT_CONNECTION_IMPL *conn; WT_DECL_RET; size_t cnt; char *newcheck_name, *p; /* * !!! * We ignore the specified uri, that is, all new configuration options * will be valid for all data sources. That shouldn't be too bad as * the worst that can happen is an application might specify some * configuration option and not get an error -- the option should be * ignored by the underlying implementation since it's unexpected, so * there shouldn't be any real problems. Eventually I expect we will * get the whole data-source thing sorted, at which time there may be * configuration arrays for each data source, and that's when the uri * will matter. */ WT_UNUSED(uri); conn = S2C(session); checks = newcheck = NULL; entry = NULL; newcheck_name = NULL; /* Argument checking; we only support a limited number of types. */ if (config == NULL) WT_RET_MSG(session, EINVAL, "no configuration specified"); if (type == NULL) WT_RET_MSG(session, EINVAL, "no configuration type specified"); if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 && strcmp(type, "list") != 0 && strcmp(type, "string") != 0) WT_RET_MSG(session, EINVAL, "type must be one of \"boolean\", \"int\", \"list\" or " "\"string\""); /* * Translate the method name to our configuration names, then find a * match. */ for (epp = conn->config_entries; *epp != NULL && (*epp)->method != NULL; ++epp) if (strcmp((*epp)->method, method) == 0) break; if (*epp == NULL || (*epp)->method == NULL) WT_RET_MSG(session, WT_NOTFOUND, "no method matching %s found", method); /* * Technically possible for threads to race, lock the connection while * adding the new configuration information. We're holding the lock * for an extended period of time, but configuration changes should be * rare and only happen during startup. */ __wt_spin_lock(session, &conn->api_lock); /* * Allocate new configuration entry and fill it in. * * The new base value is the previous base value, a separator and the * new configuration string. */ WT_ERR(__wt_calloc_one(session, &entry)); entry->method = (*epp)->method; WT_ERR(__wt_calloc_def(session, strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); (void)strcpy(p, (*epp)->base); (void)strcat(p, ","); (void)strcat(p, config); entry->base = p; /* * There may be a default value in the config argument passed in (for * example, (kvs_parallelism=64"). The default value isn't part of the * name, build a new one. */ WT_ERR(__wt_strdup(session, config, &newcheck_name)); if ((p = strchr(newcheck_name, '=')) != NULL) *p = '\0'; /* * The new configuration name may replace an existing check with new * information, in that case skip the old version. */ cnt = 0; if ((*epp)->checks != NULL) for (cp = (*epp)->checks; cp->name != NULL; ++cp) ++cnt; WT_ERR(__wt_calloc_def(session, cnt + 2, &checks)); cnt = 0; if ((*epp)->checks != NULL) for (cp = (*epp)->checks; cp->name != NULL; ++cp) if (strcmp(newcheck_name, cp->name) != 0) checks[cnt++] = *cp; newcheck = &checks[cnt]; newcheck->name = newcheck_name; WT_ERR(__wt_strdup(session, type, &newcheck->type)); WT_ERR(__wt_strdup(session, check, &newcheck->checks)); entry->checks = checks; entry->checks_entries = 0; /* * Confirm the configuration string passes the new set of * checks. */ WT_ERR(__wt_config_check(session, entry, config, 0)); /* * The next time this configuration is updated, we don't want to figure * out which of these pieces of memory were allocated and will need to * be free'd on close (this isn't a heavily used API and it's too much * work); add them all to the free-on-close list now. We don't check * for errors deliberately, we'd have to figure out which elements have * already been added to the free-on-close array and which have not in * order to avoid freeing chunks of memory twice. Again, this isn't a * commonly used API and it shouldn't ever happen, just leak it. */ __wt_conn_foc_add(session, entry->base); __wt_conn_foc_add(session, entry); __wt_conn_foc_add(session, checks); __wt_conn_foc_add(session, newcheck->type); __wt_conn_foc_add(session, newcheck->checks); __wt_conn_foc_add(session, newcheck_name); /* * Instead of using locks to protect configuration information, assume * we can atomically update a pointer to a chunk of memory, and because * a pointer is never partially written, readers will correctly see the * original or new versions of the memory. Readers might be using the * old version as it's being updated, though, which means we cannot free * the old chunk of memory until all possible readers have finished. * Currently, that's on connection close: in other words, we can use * this because it's small amounts of memory, and we really, really do * not want to acquire locks every time we access configuration strings, * since that's done on every API call. */ WT_PUBLISH(*epp, entry); if (0) { err: if (entry != NULL) { __wt_free(session, entry->base); __wt_free(session, entry); } __wt_free(session, checks); if (newcheck != NULL) { __wt_free(session, newcheck->type); __wt_free(session, newcheck->checks); } __wt_free(session, newcheck_name); } __wt_spin_unlock(session, &conn->api_lock); return (ret); }
/* * __wt_win_directory_list -- * Get a list of files from a directory, MSVC version. */ int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) { DWORD windows_error; HANDLE findhandle; WIN32_FIND_DATAW finddata; WT_DECL_ITEM(pathbuf); WT_DECL_ITEM(file_utf8); WT_DECL_ITEM(pathbuf_wide); WT_DECL_ITEM(prefix_wide); WT_DECL_RET; WT_SESSION_IMPL *session; size_t dirallocsz, pathlen, prefix_widelen; uint32_t count; char *dir_copy, **entries; session = (WT_SESSION_IMPL *)wt_session; *dirlistp = NULL; *countp = 0; findhandle = INVALID_HANDLE_VALUE; dirallocsz = 0; entries = NULL; WT_ERR(__wt_strdup(session, directory, &dir_copy)); pathlen = strlen(dir_copy); if (dir_copy[pathlen - 1] == '\\') dir_copy[pathlen - 1] = '\0'; WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy)); WT_ERR(__wt_to_utf16_string(session, pathbuf->data, &pathbuf_wide)); WT_ERR(__wt_to_utf16_string(session, prefix, &prefix_wide)); prefix_widelen = wcslen(prefix_wide->data); findhandle = FindFirstFileW(pathbuf_wide->data, &finddata); if (findhandle == INVALID_HANDLE_VALUE) { windows_error = __wt_getlasterror(); __wt_errx(session, "%s: directory-list: FindFirstFile: %s", pathbuf->data, __wt_formatmessage(session, windows_error)); WT_ERR(__wt_map_windows_error(windows_error)); } count = 0; do { /* * Skip . and .. */ if (wcscmp(finddata.cFileName, L".") == 0 || wcscmp(finddata.cFileName, L"..") == 0) continue; /* The list of files is optionally filtered by a prefix. */ if (prefix != NULL && wcsncmp(finddata.cFileName, prefix_wide->data, prefix_widelen) != 0) continue; WT_ERR(__wt_realloc_def( session, &dirallocsz, count + 1, &entries)); WT_ERR(__wt_to_utf8_string( session, finddata.cFileName, &file_utf8)); WT_ERR(__wt_strdup(session, file_utf8->data, &entries[count])); ++count; __wt_scr_free(session, &file_utf8); } while (FindNextFileW(findhandle, &finddata) != 0); *dirlistp = entries; *countp = count; err: if (findhandle != INVALID_HANDLE_VALUE) if (FindClose(findhandle) == 0) { windows_error = __wt_getlasterror(); __wt_errx(session, "%s: directory-list: FindClose: %s", pathbuf->data, __wt_formatmessage(session, windows_error)); if (ret == 0) ret = __wt_map_windows_error(windows_error); } __wt_free(session, dir_copy); __wt_scr_free(session, &pathbuf); __wt_scr_free(session, &file_utf8); __wt_scr_free(session, &pathbuf_wide); __wt_scr_free(session, &prefix_wide); if (ret == 0) return (0); WT_TRET(__wt_win_directory_list_free( file_system, wt_session, entries, count)); WT_RET_MSG(session, ret, "%s: directory-list, prefix \"%s\"", directory, prefix == NULL ? "" : prefix); }
/* * __conn_dhandle_get -- * Allocate a new data handle, lock it exclusively, and return it linked * into the connection's list. */ static int __conn_dhandle_get(WT_SESSION_IMPL *session, const char *name, const char *ckpt, uint32_t flags) { WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; uint32_t bucket; conn = S2C(session); /* * We have the handle lock, check whether we can find the handle we * are looking for. If we do, and we can lock it in the state we * want, this session will take ownership and we are done. */ ret = __wt_conn_dhandle_find(session, name, ckpt, flags); if (ret == 0) { dhandle = session->dhandle; WT_RET(__conn_dhandle_open_lock(session, dhandle, flags)); return (0); } WT_RET_NOTFOUND_OK(ret); /* * If no handle was found, allocate the data handle and a btree handle, * then initialize the data handle. Exclusively lock the data handle * before inserting it in the list. */ WT_RET(__wt_calloc_one(session, &dhandle)); WT_ERR(__wt_rwlock_alloc(session, &dhandle->rwlock, "data handle")); dhandle->name_hash = __wt_hash_city64(name, strlen(name)); WT_ERR(__wt_strdup(session, name, &dhandle->name)); if (ckpt != NULL) WT_ERR(__wt_strdup(session, ckpt, &dhandle->checkpoint)); WT_ERR(__wt_calloc_one(session, &btree)); dhandle->handle = btree; btree->dhandle = dhandle; WT_ERR(__wt_spin_init( session, &dhandle->close_lock, "data handle close")); F_SET(dhandle, WT_DHANDLE_EXCLUSIVE); WT_ERR(__wt_writelock(session, dhandle->rwlock)); /* * Prepend the handle to the connection list, assuming we're likely to * need new files again soon, until they are cached by all sessions. * Find the right hash bucket to insert into as well. */ WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED)); bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket); session->dhandle = dhandle; return (0); err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock)); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); __wt_free(session, dhandle->handle); /* btree free */ __wt_spin_destroy(session, &dhandle->close_lock); __wt_overwrite_and_free(session, dhandle); return (ret); }
/* * __posix_open_file -- * Open a file handle. */ static int __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FILE_HANDLE *file_handle; WT_FILE_HANDLE_POSIX *pfh; WT_SESSION_IMPL *session; mode_t mode; int f; WT_UNUSED(file_system); *file_handlep = NULL; session = (WT_SESSION_IMPL *)wt_session; conn = S2C(session); WT_RET(__wt_calloc_one(session, &pfh)); /* Set up error handling. */ pfh->fd = -1; if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC /* * Security: * The application may spawn a new process, and we don't want * another process to have access to our file handles. */ f |= O_CLOEXEC; #endif WT_SYSCALL_RETRY(( (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: open", name); WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); goto directory_open; } f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; if (LF_ISSET(WT_OPEN_CREATE)) { f |= O_CREAT; if (LF_ISSET(WT_OPEN_EXCLUSIVE)) f |= O_EXCL; mode = 0666; } else mode = 0; #ifdef O_BINARY /* Windows clones: we always want to treat the file as a binary. */ f |= O_BINARY; #endif #ifdef O_CLOEXEC /* * Security: * The application may spawn a new process, and we don't want another * process to have access to our file handles. */ f |= O_CLOEXEC; #endif #ifdef O_DIRECT /* Direct I/O. */ if (LF_ISSET(WT_OPEN_DIRECTIO)) { f |= O_DIRECT; pfh->direct_io = true; } else pfh->direct_io = false; #endif #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ if (file_type == WT_OPEN_FILE_TYPE_DATA) f |= O_NOATIME; #endif if (file_type == WT_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { #ifdef O_DSYNC f |= O_DSYNC; #elif defined(O_SYNC) f |= O_SYNC; #else WT_ERR_MSG(session, ENOTSUP, "unsupported log sync mode configured"); #endif } WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, " "some filesystem types do not support direct I/O" : "%s: handle-open: open", name); WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); #if defined(HAVE_POSIX_FADVISE) /* * Disable read-ahead on trees: it slows down random read workloads. * Ignore fadvise when doing direct I/O, the kernel cache isn't * interesting. */ if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) { WT_SYSCALL( posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: posix_fadvise", name); } #endif directory_open: /* Initialize public information. */ file_handle = (WT_FILE_HANDLE *)pfh; WT_ERR(__wt_strdup(session, name, &file_handle->name)); file_handle->close = __posix_file_close; #if defined(HAVE_POSIX_FADVISE) /* * Ignore fadvise when doing direct I/O, the kernel cache isn't * interesting. */ if (!pfh->direct_io) file_handle->fh_advise = __posix_file_advise; #endif file_handle->fh_allocate = __wt_posix_file_fallocate; file_handle->fh_lock = __posix_file_lock; #ifdef WORDS_BIGENDIAN /* * The underlying objects are little-endian, mapping objects isn't * currently supported on big-endian systems. */ #else file_handle->fh_map = __wt_posix_map; #ifdef HAVE_POSIX_MADVISE file_handle->fh_map_discard = __wt_posix_map_discard; file_handle->fh_map_preload = __wt_posix_map_preload; #endif file_handle->fh_unmap = __wt_posix_unmap; #endif file_handle->fh_read = __posix_file_read; file_handle->fh_size = __posix_file_size; file_handle->fh_sync = __posix_file_sync; #ifdef HAVE_SYNC_FILE_RANGE file_handle->fh_sync_nowait = __posix_file_sync_nowait; #endif file_handle->fh_truncate = __posix_file_truncate; file_handle->fh_write = __posix_file_write; *file_handlep = file_handle; return (0); err: WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session)); return (ret); }
/* * __ckpt_server_config -- * Parse and setup the checkpoint server options. */ static int __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; char *p; conn = S2C(session); /* * The checkpoint configuration requires a wait time and/or a log * size -- if one is not set, we're not running at all. * Checkpoints based on log size also require logging be enabled. */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval)); conn->ckpt_usecs = (uint64_t)cval.val * 1000000; WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval)); conn->ckpt_logsize = (wt_off_t)cval.val; /* Checkpoints are incompatible with in-memory configuration */ if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) { WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, "In memory configuration incompatible with " "checkpoints"); } __wt_log_written_reset(session); if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) || (conn->ckpt_logsize && conn->ckpt_usecs == 0 && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { *startp = false; return (0); } *startp = true; /* * The application can specify a checkpoint name, which we ignore if * it's our default. */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); if (cval.len != 0 && !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); WT_ERR(__wt_buf_fmt( session, tmp, "name=%.*s", (int)cval.len, cval.str)); WT_ERR(__wt_strdup(session, tmp->data, &p)); __wt_free(session, conn->ckpt_config); conn->ckpt_config = p; } err: __wt_scr_free(session, &tmp); return (ret); }
/* * __wt_open -- * Open a file handle. */ int __wt_open(WT_SESSION_IMPL *session, const char *name, int ok_create, int exclusive, int dio_type, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh, *tfh; mode_t mode; int direct_io, f, fd, matched; const char *path; conn = S2C(session); fh = NULL; fd = -1; path = NULL; WT_VERBOSE_RET(session, fileops, "%s: open", name); /* Increment the reference count if we already have the file open. */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(tfh, &conn->fhqh, q) if (strcmp(name, tfh->name) == 0) { ++tfh->refcnt; *fhp = tfh; matched = 1; break; } __wt_spin_unlock(session, &conn->fh_lock); if (matched) return (0); WT_RET(__wt_filename(session, name, &path)); f = O_RDWR; #ifdef O_BINARY /* Windows clones: we always want to treat the file as a binary. */ f |= O_BINARY; #endif #ifdef O_CLOEXEC /* * Security: * The application may spawn a new process, and we don't want another * process to have access to our file handles. */ f |= O_CLOEXEC; #endif #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ if (dio_type == WT_FILE_TYPE_DATA) f |= O_NOATIME; #endif if (ok_create) { f |= O_CREAT; if (exclusive) f |= O_EXCL; mode = 0666; } else mode = 0; direct_io = 0; #ifdef O_DIRECT if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { f |= O_DIRECT; direct_io = 1; } #endif if (dio_type == WT_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) #ifdef O_DSYNC f |= O_DSYNC; #elif defined(O_SYNC) f |= O_SYNC; #else WT_ERR_MSG(session, ENOTSUP, "Unsupported log sync mode requested"); #endif WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, direct_io ? "%s: open failed with direct I/O configured, some " "filesystem types do not support direct I/O" : "%s", path); #if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) /* * Security: * The application may spawn a new process, and we don't want another * process to have access to our file handles. There's an obvious * race here, so we prefer the flag to open if available. */ if ((f = fcntl(fd, F_GETFD)) == -1 || fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name); #endif #if defined(HAVE_POSIX_FADVISE) /* Disable read-ahead on trees: it slows down random read workloads. */ if (dio_type == WT_FILE_TYPE_DATA) WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM)); #endif if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) WT_ERR(__open_directory_sync(session)); WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); fh->fd = fd; fh->refcnt = 1; fh->direct_io = direct_io; /* Set the file's size. */ WT_ERR(__wt_filesize(session, fh, &fh->size)); /* Configure file extension. */ if (dio_type == WT_FILE_TYPE_DATA) fh->extend_len = conn->data_extend_len; /* * Repeat the check for a match, but then link onto the database's list * of files. */ matched = 0; __wt_spin_lock(session, &conn->fh_lock); TAILQ_FOREACH(tfh, &conn->fhqh, q) if (strcmp(name, tfh->name) == 0) { ++tfh->refcnt; *fhp = tfh; matched = 1; break; } if (!matched) { TAILQ_INSERT_TAIL(&conn->fhqh, fh, q); WT_STAT_FAST_CONN_INCR(session, file_open); *fhp = fh; } __wt_spin_unlock(session, &conn->fh_lock); if (matched) { err: if (fh != NULL) { __wt_free(session, fh->name); __wt_free(session, fh); } if (fd != -1) (void)close(fd); } __wt_free(session, path); return (ret); }
/* * __wt_block_open -- * Open a block handle. */ int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp) { WT_BLOCK *block; WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_VERBOSE_TRET(session, block, "open: %s", filename); conn = S2C(session); *blockp = NULL; __wt_spin_lock(session, &conn->block_lock); TAILQ_FOREACH(block, &conn->blockqh, q) if (strcmp(filename, block->name) == 0) { ++block->ref; *blockp = block; __wt_spin_unlock(session, &conn->block_lock); return (0); } /* Basic structure allocation, initialization. */ WT_ERR(__wt_calloc_def(session, 1, &block)); block->ref = 1; TAILQ_INSERT_HEAD(&conn->blockqh, block, q); WT_ERR(__wt_strdup(session, filename, &block->name)); block->allocsize = allocsize; WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval)); block->allocfirst = WT_STRING_MATCH("first", cval.str, cval.len) ? 1 : 0; /* Configuration: optional OS buffer cache maximum size. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval)); block->os_cache_max = cval.val; #ifdef HAVE_POSIX_FADVISE if (conn->direct_io && block->os_cache_max) WT_ERR_MSG(session, EINVAL, "os_cache_max not supported in combination with direct_io"); #else if (block->os_cache_max) WT_ERR_MSG(session, EINVAL, "os_cache_max not supported if posix_fadvise not " "available"); #endif /* Configuration: optional immediate write scheduling flag. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval)); block->os_cache_dirty_max = cval.val; #ifdef HAVE_SYNC_FILE_RANGE if (conn->direct_io && block->os_cache_dirty_max) WT_ERR_MSG(session, EINVAL, "os_cache_dirty_max not supported in combination with " "direct_io"); #else if (block->os_cache_dirty_max) WT_ERR_MSG(session, EINVAL, "os_cache_dirty_max not supported if sync_file_range not " "available"); #endif /* Open the underlying file handle. */ WT_ERR(__wt_open( session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); /* * Read the description information from the first block. * * Salvage is a special case: if we're forcing the salvage, we don't * look at anything, including the description information. */ if (!forced_salvage) WT_ERR(__desc_read(session, block)); *blockp = block; __wt_spin_unlock(session, &conn->block_lock); return (0); err: WT_TRET(__block_destroy(session, block)); __wt_spin_unlock(session, &conn->block_lock); return (ret); }