/* * wiredtiger_strerror -- * Return a string for any error value, non-thread-safe version. */ const char * wiredtiger_strerror(int error) { static char buf[128]; return (__wt_strerror(NULL, error, buf, sizeof(buf))); }
/* * __handler_failure -- * Report the failure of an application-configured event handler. */ static void __handler_failure(WT_SESSION_IMPL *session, int error, const char *which, bool error_handler_failed) { WT_EVENT_HANDLER *handler; WT_SESSION *wt_session; /* * !!! * SECURITY: * Buffer placed at the end of the stack in case snprintf overflows. */ char s[256]; (void)snprintf(s, sizeof(s), "application %s event handler failed: %s", which, __wt_strerror(session, error, NULL, 0)); /* * Use the error handler to report the failure, unless it was the error * handler that failed. If it was the error handler that failed, or a * call to the error handler fails, use the default error handler. */ wt_session = (WT_SESSION *)session; handler = session->event_handler; if (!error_handler_failed && handler->handle_error != __handle_error_default && handler->handle_error(handler, wt_session, error, s) == 0) return; (void)__handle_error_default(NULL, wt_session, error, s); }
/* * __wt_strerror_r -- * Windows implementation of wiredtiger_strerror_r. */ int __wt_strerror_r(int error, char *buf, size_t buflen) { DWORD lasterror; const char *p; /* Require at least 2 bytes, printable character and trailing nul. */ if (buflen < 2) return (ENOMEM); /* * Check for POSIX errors, Windows errors, then fallback to something * generic. Copy the string into the user's buffer, return success if * anything printed. */ p = __wt_strerror(error); if (p != NULL && snprintf(buf, buflen, "%s", p) > 0) return (0); if (error < 0) { error = __wt_map_error_to_windows_error(error); lasterror = FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error, 0, /* let system choose the correct LANGID */ buf, buflen, NULL); if (lasterror != 0) return (0); /* Fall through to the fallback error code */ } /* Fallback to a generic message, then guess it's a memory problem. */ return ( snprintf(buf, buflen, "error return: %d", error) > 0 ? 0 : ENOMEM); }
/* * __wt_lsm_merge -- * Merge a set of chunks of an LSM tree. */ int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) { WT_BLOOM *bloom; WT_CURSOR *dest, *src; WT_DECL_RET; WT_ITEM key, value; WT_LSM_CHUNK *chunk; uint32_t generation; uint64_t insert_count, record_count; u_int dest_id, end_chunk, i, nchunks, start_chunk, start_id, verb; int tret; bool created_chunk, create_bloom, locked, in_sync; const char *cfg[3]; const char *drop_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL }; bloom = NULL; chunk = NULL; dest = src = NULL; start_id = 0; created_chunk = create_bloom = locked = in_sync = false; /* Fast path if it's obvious no merges could be done. */ if (lsm_tree->nchunks < lsm_tree->merge_min && lsm_tree->merge_aggressiveness < WT_LSM_AGGRESSIVE_THRESHOLD) return (WT_NOTFOUND); /* * Use the lsm_tree lock to read the chunks (so no switches occur), but * avoid holding it while the merge is in progress: that may take a * long time. */ WT_RET(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; WT_ERR(__lsm_merge_span(session, lsm_tree, id, &start_chunk, &end_chunk, &record_count)); nchunks = (end_chunk + 1) - start_chunk; WT_ASSERT(session, nchunks > 0); start_id = lsm_tree->chunk[start_chunk]->id; /* Find the merge generation. */ for (generation = 0, i = 0; i < nchunks; i++) generation = WT_MAX(generation, lsm_tree->chunk[start_chunk + i]->generation + 1); WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); locked = false; /* Allocate an ID for the merge. */ dest_id = __wt_atomic_add32(&lsm_tree->last, 1); /* * We only want to do the chunk loop if we're running with verbose, * so we wrap these statements in the conditional. Avoid the loop * in the normal path. */ if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Merging %s chunks %u-%u into %u (%" PRIu64 " records)" ", generation %" PRIu32, lsm_tree->name, start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 ", size: %" PRIu64 ", records: %" PRIu64, lsm_tree->name, verb, lsm_tree->chunk[verb]->id, lsm_tree->chunk[verb]->generation, lsm_tree->chunk[verb]->size, lsm_tree->chunk[verb]->count)); } WT_ERR(__wt_calloc_one(session, &chunk)); created_chunk = true; chunk->id = dest_id; if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED) && (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) || start_chunk > 0) && record_count > 0) create_bloom = true; /* * Special setup for the merge cursor: * first, reset to open the dependent cursors; * then restrict the cursor to a specific number of chunks; * then set MERGE so the cursor doesn't track updates to the tree. */ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src)); F_SET(src, WT_CURSTD_RAW); WT_ERR(__wt_clsm_init_merge(src, start_chunk, start_id, nchunks)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); WT_ERR(ret); if (create_bloom) { WT_ERR(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk)); WT_ERR(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, record_count, lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom)); } /* Discard pages we read as soon as we're done with them. */ F_SET(session, WT_SESSION_NO_CACHE); cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor); cfg[1] = "bulk,raw,skip_sort_check"; cfg[2] = NULL; WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest)); #define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) WT_ERR(EINTR); WT_STAT_FAST_CONN_INCRV(session, lsm_rows_merged, LSM_MERGE_CHECK_INTERVAL); ++lsm_tree->merge_progressing; } WT_ERR(src->get_key(src, &key)); dest->set_key(dest, &key); WT_ERR(src->get_value(src, &value)); dest->set_value(dest, &value); WT_ERR(dest->insert(dest)); if (create_bloom) WT_ERR(__wt_bloom_insert(bloom, &key)); } WT_ERR_NOTFOUND_OK(ret); WT_STAT_FAST_CONN_INCRV(session, lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL); ++lsm_tree->merge_progressing; WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted.", record_count, insert_count)); /* * Closing and syncing the files can take a while. Set the * merge_syncing field so that compact knows it is still in * progress. */ (void)__wt_atomic_add32(&lsm_tree->merge_syncing, 1); in_sync = true; /* * We've successfully created the new chunk. Now install it. We need * to ensure that the NO_CACHE flag is cleared and the bloom filter * is closed (even if a step fails), so track errors but don't return * until we've cleaned up. */ WT_TRET(src->close(src)); WT_TRET(dest->close(dest)); src = dest = NULL; F_CLR(session, WT_SESSION_NO_CACHE); /* * We're doing advisory reads to fault the new trees into cache. * Don't block if the cache is full: our next unit of work may be to * discard some trees to free space. */ F_SET(session, WT_SESSION_NO_EVICTION); if (create_bloom) { if (ret == 0) WT_TRET(__wt_bloom_finalize(bloom)); /* * Read in a key to make sure the Bloom filters btree handle is * open before it becomes visible to application threads. * Otherwise application threads will stall while it is opened * and internal pages are read into cache. */ if (ret == 0) { WT_CLEAR(key); WT_TRET_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); } WT_TRET(__wt_bloom_close(bloom)); bloom = NULL; } WT_ERR(ret); /* * Open a handle on the new chunk before application threads attempt * to access it, opening it pre-loads internal pages into the file * system cache. */ cfg[1] = "checkpoint=" WT_CHECKPOINT; WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest)); WT_TRET(dest->close(dest)); dest = NULL; ++lsm_tree->merge_progressing; (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); in_sync = false; WT_ERR_NOTFOUND_OK(ret); WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; /* * Check whether we raced with another merge, and adjust the chunk * array offset as necessary. */ if (start_chunk >= lsm_tree->nchunks || lsm_tree->chunk[start_chunk]->id != start_id) for (start_chunk = 0; start_chunk < lsm_tree->nchunks; start_chunk++) if (lsm_tree->chunk[start_chunk]->id == start_id) break; /* * It is safe to error out here - since the update can only fail * prior to making updates to the tree. */ WT_ERR(__wt_lsm_merge_update_tree( session, lsm_tree, start_chunk, nchunks, chunk)); if (create_bloom) F_SET(chunk, WT_LSM_CHUNK_BLOOM); chunk->count = insert_count; chunk->generation = generation; F_SET(chunk, WT_LSM_CHUNK_ONDISK); /* * We have no current way of continuing if the metadata update fails, * so we will panic in that case. Put some effort into cleaning up * after ourselves here - so things have a chance of shutting down. * * Any errors that happened after the tree was locked are * fatal - we can't guarantee the state of the tree. */ if ((ret = __wt_lsm_meta_write(session, lsm_tree)) != 0) WT_PANIC_ERR(session, ret, "Failed finalizing LSM merge"); lsm_tree->dsk_gen++; /* Update the throttling while holding the tree lock. */ __wt_lsm_tree_throttle(session, lsm_tree, true); /* Schedule a pass to discard old chunks */ WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_DROP, 0, lsm_tree)); err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); if (in_sync) (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); if (src != NULL) WT_TRET(src->close(src)); if (dest != NULL) WT_TRET(dest->close(dest)); if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); if (ret != 0 && created_chunk) { /* Drop the newly-created files on error. */ if (chunk->uri != NULL) { WT_WITH_SCHEMA_LOCK(session, tret = __wt_schema_drop(session, chunk->uri, drop_cfg)); WT_TRET(tret); } if (create_bloom && chunk->bloom_uri != NULL) { WT_WITH_SCHEMA_LOCK(session, tret = __wt_schema_drop( session, chunk->bloom_uri, drop_cfg)); WT_TRET(tret); } __wt_free(session, chunk->bloom_uri); __wt_free(session, chunk->uri); __wt_free(session, chunk); if (ret == EINTR) WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Merge aborted due to close")); else WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", __wt_strerror(session, ret, NULL, 0))); } F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); }
/* * __wt_eventv -- * Report a message to an event handler. */ int __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) { WT_EVENT_HANDLER *handler; WT_DECL_RET; WT_SESSION *wt_session; struct timespec ts; size_t len, remain, wlen; int prefix_cnt; const char *err, *prefix; char *end, *p, tid[128]; /* * We're using a stack buffer because we want error messages no matter * what, and allocating a WT_ITEM, or the memory it needs, might fail. * * !!! * SECURITY: * Buffer placed at the end of the stack in case snprintf overflows. */ char s[2048]; /* * !!! * This function MUST handle a NULL WT_SESSION_IMPL handle. * * Without a session, we don't have event handlers or prefixes for the * error message. Write the error to stderr and call it a day. (It's * almost impossible for that to happen given how early we allocate the * first session, but if the allocation of the first session fails, for * example, we can end up here without a session.) */ if (session == NULL) { if (fprintf(stderr, "WiredTiger Error%s%s: ", error == 0 ? "" : ": ", error == 0 ? "" : __wt_strerror(session, error, NULL, 0)) < 0) ret = EIO; if (vfprintf(stderr, fmt, ap) < 0) ret = EIO; if (fprintf(stderr, "\n") < 0) ret = EIO; if (fflush(stderr) != 0) ret = EIO; return (ret); } p = s; end = s + sizeof(s); /* * We have several prefixes for the error message: a timestamp and the * process and thread ids, the database error prefix, the data-source's * name, and the session's name. Write them as a comma-separate list, * followed by a colon. */ prefix_cnt = 0; if (__wt_epoch(session, &ts) == 0) { __wt_thread_id(tid, sizeof(tid)); remain = WT_PTRDIFF(end, p); wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]", (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid); p = wlen >= remain ? end : p + wlen; prefix_cnt = 1; } if ((prefix = S2C(session)->error_prefix) != NULL) { remain = WT_PTRDIFF(end, p); wlen = (size_t)snprintf(p, remain, "%s%s", prefix_cnt == 0 ? "" : ", ", prefix); p = wlen >= remain ? end : p + wlen; prefix_cnt = 1; } prefix = session->dhandle == NULL ? NULL : session->dhandle->name; if (prefix != NULL) { remain = WT_PTRDIFF(end, p); wlen = (size_t)snprintf(p, remain, "%s%s", prefix_cnt == 0 ? "" : ", ", prefix); p = wlen >= remain ? end : p + wlen; prefix_cnt = 1; } if ((prefix = session->name) != NULL) { remain = WT_PTRDIFF(end, p); wlen = (size_t)snprintf(p, remain, "%s%s", prefix_cnt == 0 ? "" : ", ", prefix); p = wlen >= remain ? end : p + wlen; prefix_cnt = 1; } if (prefix_cnt != 0) { remain = WT_PTRDIFF(end, p); wlen = (size_t)snprintf(p, remain, ": "); p = wlen >= remain ? end : p + wlen; } if (file_name != NULL) { remain = WT_PTRDIFF(end, p); wlen = (size_t) snprintf(p, remain, "%s, %d: ", file_name, line_number); p = wlen >= remain ? end : p + wlen; } remain = WT_PTRDIFF(end, p); wlen = (size_t)vsnprintf(p, remain, fmt, ap); p = wlen >= remain ? end : p + wlen; if (error != 0) { /* * When the engine calls __wt_err on error, it often outputs an * error message including the string associated with the error * it's returning. We could change the calls to call __wt_errx, * but it's simpler to not append an error string if all we are * doing is duplicating an existing error string. * * Use strcmp to compare: both strings are nul-terminated, and * we don't want to run past the end of the buffer. */ err = __wt_strerror(session, error, NULL, 0); len = strlen(err); if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) { remain = WT_PTRDIFF(end, p); (void)snprintf(p, remain, ": %s", err); } } /* * If a handler fails, return the error status: if we're in the process * of handling an error, any return value we provide will be ignored by * our caller, our caller presumably already has an error value it will * be returning. * * If an application-specified or default informational message handler * fails, complain using the application-specified or default error * handler. * * If an application-specified error message handler fails, complain * using the default error handler. If the default error handler fails, * there's nothing to do. */ wt_session = (WT_SESSION *)session; handler = session->event_handler; if (msg_event) { ret = handler->handle_message(handler, wt_session, s); if (ret != 0) __handler_failure(session, ret, "message", false); } else { ret = handler->handle_error(handler, wt_session, error, s); if (ret != 0 && handler->handle_error != __handle_error_default) __handler_failure(session, ret, "error", true); } return (ret); }