/* * __wt_posix_file_fallocate -- * POSIX fallocate. */ int __wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len) { /* * The first fallocate call: figure out what fallocate call this system * supports, if any. * * The function is configured as a locking fallocate call, so we know * we're single-threaded through here. Set the nolock function first, * then publish the NULL replacement to ensure the handle functions are * always correct. * * We've seen Linux systems where posix_fallocate has corrupted * existing file data (even though that is explicitly disallowed * by POSIX). FreeBSD and Solaris support posix_fallocate, and * so far we've seen no problems leaving it unlocked. Check for * fallocate (and the system call version of fallocate) first to * avoid locking on Linux if at all possible. */ if (__posix_std_fallocate(file_handle, wt_session, offset, len) == 0) { file_handle->fh_allocate_nolock = __posix_std_fallocate; WT_PUBLISH(file_handle->fh_allocate, NULL); return (0); } if (__posix_sys_fallocate(file_handle, wt_session, offset, len) == 0) { file_handle->fh_allocate_nolock = __posix_sys_fallocate; WT_PUBLISH(file_handle->fh_allocate, NULL); return (0); } if (__posix_posix_fallocate( file_handle, wt_session, offset, len) == 0) { #if defined(__linux__) file_handle->fh_allocate = __posix_posix_fallocate; WT_WRITE_BARRIER(); #else file_handle->fh_allocate_nolock = __posix_posix_fallocate; WT_PUBLISH(file_handle->fh_allocate, NULL); #endif return (0); } file_handle->fh_allocate = NULL; WT_WRITE_BARRIER(); return (ENOTSUP); }
/* * __wt_connection_open -- * Open a connection. */ int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) { WT_SESSION_IMPL *session; /* Default session. */ session = conn->default_session; WT_ASSERT(session, session->iface.connection == &conn->iface); /* * Tell internal server threads to run: this must be set before opening * any sessions. */ F_SET(conn, WT_CONN_SERVER_RUN | WT_CONN_LOG_SERVER_RUN); /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->sessions); /* * Open the default session. We open this before starting service * threads because those may allocate and use session resources that * need to get cleaned up on close. */ WT_RET(__wt_open_internal_session( conn, "connection", false, 0, &session)); /* * The connection's default session is originally a static structure, * swap that out for a more fully-functional session. It's necessary * to have this step: the session allocation code uses the connection's * session, and if we pass a reference to the default session as the * place to store the allocated session, things get confused and error * handling can be corrupted. So, we allocate into a stack variable * and then assign it on success. */ conn->default_session = session; /* * Publish: there must be a barrier to ensure the connection structure * fields are set before other threads read from the pointer. */ WT_WRITE_BARRIER(); /* Create the cache. */ WT_RET(__wt_cache_create(session, cfg)); /* Initialize transaction support. */ WT_RET(__wt_txn_global_init(session, cfg)); return (0); }
/* * __wt_insert_serial_func -- * Server function to add an WT_INSERT entry to the page. */ int __wt_insert_serial_func(WT_SESSION_IMPL *session, void *args) { WT_INSERT *new_ins, ***ins_stack; WT_INSERT_HEAD *inshead, **insheadp, **new_inslist, *new_inshead; WT_PAGE *page; uint32_t write_gen; u_int i, skipdepth; __wt_insert_unpack(args, &page, &write_gen, &insheadp, &ins_stack, &new_inslist, &new_inshead, &new_ins, &skipdepth); /* Check the page's write-generation. */ WT_RET(__wt_page_write_gen_check(session, page, write_gen)); /* * Publish: First, point the new WT_INSERT item's skiplist references * to the next elements in the insert list, then flush memory. Second, * update the skiplist elements that reference the new WT_INSERT item, * this ensures the list is never inconsistent. */ if ((inshead = *insheadp) == NULL) inshead = new_inshead; for (i = 0; i < skipdepth; i++) new_ins->next[i] = *ins_stack[i]; WT_WRITE_BARRIER(); for (i = 0; i < skipdepth; i++) { if (inshead->tail[i] == NULL || ins_stack[i] == &inshead->tail[i]->next[i]) inshead->tail[i] = new_ins; *ins_stack[i] = new_ins; } __wt_insert_new_ins_taken(session, args, page); /* * If the insert head does not yet have an insert list, our caller * passed us one. * * NOTE: it is important to do this after the item has been added to * the list. Code can assume that if the list is set, it is non-empty. */ if (*insheadp == NULL) { WT_PUBLISH(*insheadp, new_inshead); __wt_insert_new_inshead_taken(session, args, page); } /* * If the page does not yet have an insert array, our caller passed * us one. * * NOTE: it is important to do this after publishing the list entry. * Code can assume that if the array is set, it is non-empty. */ if (page->type == WT_PAGE_ROW_LEAF) { if (page->u.row.ins == NULL) { page->u.row.ins = new_inslist; __wt_insert_new_inslist_taken(session, args, page); } } else if (page->modify->update == NULL) { page->modify->update = new_inslist; __wt_insert_new_inslist_taken(session, args, page); } __wt_page_and_tree_modify_set(session, page); return (0); }
/* * __wt_col_append_serial_func -- * Server function to append an WT_INSERT entry to the tree. */ int __wt_col_append_serial_func(WT_SESSION_IMPL *session, void *args) { WT_BTREE *btree; WT_INSERT *ins, *new_ins, ***ins_stack, **next_stack; WT_INSERT_HEAD *inshead, **insheadp, **new_inslist, *new_inshead; WT_PAGE *page; uint64_t recno; uint32_t write_gen; u_int i, skipdepth; btree = S2BT(session); __wt_col_append_unpack(args, &page, &write_gen, &insheadp, &ins_stack, &next_stack, &new_inslist, &new_inshead, &new_ins, &skipdepth); /* Check the page's write-generation. */ WT_RET(__wt_page_write_gen_check(session, page, write_gen)); if ((inshead = *insheadp) == NULL) inshead = new_inshead; /* * If the application specified a record number, there's a race: the * application may have searched for the record, not found it, then * called into the append code, and another thread might have added * the record. Fortunately, we're in the right place because if the * record didn't exist at some point, it can only have been created * on this list. Search for the record, if specified. */ if ((recno = WT_INSERT_RECNO(new_ins)) == 0) recno = WT_INSERT_RECNO(new_ins) = ++btree->last_recno; ins = __col_insert_search(inshead, ins_stack, next_stack, recno); /* If we find the record number, there's been a race. */ if (ins != NULL && WT_INSERT_RECNO(ins) == recno) WT_RET(WT_RESTART); /* * Publish: First, point the new WT_INSERT item's skiplist references * to the next elements in the insert list, then flush memory. Second, * update the skiplist elements that reference the new WT_INSERT item, * this ensures the list is never inconsistent. */ for (i = 0; i < skipdepth; i++) new_ins->next[i] = *ins_stack[i]; WT_WRITE_BARRIER(); for (i = 0; i < skipdepth; i++) { if (inshead->tail[i] == NULL || ins_stack[i] == &inshead->tail[i]->next[i]) inshead->tail[i] = new_ins; *ins_stack[i] = new_ins; } __wt_col_append_new_ins_taken(args); /* * If the insert head does not yet have an insert list, our caller * passed us one. * * NOTE: it is important to do this after the item has been added to * the list. Code can assume that if the list is set, it is non-empty. */ if (*insheadp == NULL) { WT_PUBLISH(*insheadp, new_inshead); __wt_col_append_new_inshead_taken(args); } /* * If the page does not yet have an insert array, our caller passed * us one. * * NOTE: it is important to do this after publishing the list entry. * Code can assume that if the array is set, it is non-empty. */ if (page->modify->append == NULL) { page->modify->append = new_inslist; __wt_col_append_new_inslist_taken(args); } /* * If we don't find the record, check to see if we extended the file, * and update the last record number. */ if (recno > btree->last_recno) btree->last_recno = recno; __wt_page_and_tree_modify_set(session, page); return (0); }