/* * __curlog_close -- * WT_CURSOR.close method for the log cursor type. */ static int __curlog_close(WT_CURSOR *cursor) { WT_CONNECTION_IMPL *conn; WT_CURSOR_LOG *cl; WT_DECL_RET; WT_SESSION_IMPL *session; cl = (WT_CURSOR_LOG *)cursor; CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL); err: conn = S2C(session); if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) { (void)__wt_atomic_sub32(&conn->log_cursors, 1); __wt_readunlock(session, &conn->log->log_archive_lock); } __wt_free(session, cl->cur_lsn); __wt_free(session, cl->next_lsn); __wt_scr_free(session, &cl->logrec); __wt_scr_free(session, &cl->opkey); __wt_scr_free(session, &cl->opvalue); __wt_free(session, cl->packed_key); __wt_free(session, cl->packed_value); __wt_cursor_close(cursor); API_END_RET(session, ret); }
/* * __lsm_unpin_chunks -- * Decrement the reference count for a set of chunks. Allowing those * chunks to be considered for deletion. */ static void __lsm_unpin_chunks(WT_SESSION_IMPL *session, WT_LSM_WORKER_COOKIE *cookie) { u_int i; for (i = 0; i < cookie->nchunks; i++) { if (cookie->chunk_array[i] == NULL) continue; WT_ASSERT(session, cookie->chunk_array[i]->refcnt > 0); (void)__wt_atomic_sub32(&cookie->chunk_array[i]->refcnt, 1); } /* Ensure subsequent calls don't double decrement. */ cookie->nchunks = 0; }
/* * __lsm_worker_general_op -- * Execute a single bloom, drop or flush work unit. */ static int __lsm_worker_general_op( WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *cookie, bool *completed) { WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; bool force; *completed = false; /* * Return if this thread cannot process a bloom, drop or flush. */ if (!FLD_ISSET(cookie->type, WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_FLUSH)) return (WT_NOTFOUND); if ((ret = __wt_lsm_manager_pop_entry(session, cookie->type, &entry)) != 0 || entry == NULL) return (ret); if (entry->type == WT_LSM_WORK_FLUSH) { force = F_ISSET(entry, WT_LSM_WORK_FORCE); F_CLR(entry, WT_LSM_WORK_FORCE); WT_ERR(__wt_lsm_get_chunk_to_flush(session, entry->lsm_tree, force, &chunk)); /* * If we got a chunk to flush, checkpoint it. */ if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Flush%s chunk %d %s", force ? " w/ force" : "", chunk->id, chunk->uri)); ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); WT_ASSERT(session, chunk->refcnt > 0); (void)__wt_atomic_sub32(&chunk->refcnt, 1); WT_ERR(ret); } } else if (entry->type == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->type == WT_LSM_WORK_BLOOM) WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree)); *completed = true; err: __wt_lsm_manager_free_work_unit(session, entry); return (ret); }
/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; uint64_t bucket; conn = S2C(session); if (*fhp == NULL) return (0); fh = *fhp; *fhp = NULL; __wt_spin_lock(session, &conn->fh_lock); if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); /* Discard the memory. * Note: For directories, we do not open valid directory handles on * windows since it is not possible to sync a directory */ if (fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: %s", fh->name); } if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(fh->filehandle_secondary) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); } __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __wt_close -- * Close a file handle. */ int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *fh; uint64_t bucket; conn = S2C(session); if (*fhp == NULL) return (0); fh = *fhp; *fhp = NULL; /* Track handle-close as a file operation, so open and close match. */ WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); /* * If the reference count hasn't gone to 0, or if it's an in-memory * object, we're done. * * Assert the reference count is correct, but don't let it wrap. */ __wt_spin_lock(session, &conn->fh_lock); WT_ASSERT(session, fh->ref > 0); if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { __wt_spin_unlock(session, &conn->fh_lock); return (0); } /* Remove from the list. */ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_FILE_REMOVE(conn, fh, bucket); (void)__wt_atomic_sub32(&conn->open_file_count, 1); __wt_spin_unlock(session, &conn->fh_lock); /* Discard underlying resources. */ ret = fh->fh_close(session, fh); __wt_free(session, fh->name); __wt_free(session, fh); return (ret); }
/* * __wt_lsm_merge -- * Merge a set of chunks of an LSM tree. */ int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) { WT_BLOOM *bloom; WT_CURSOR *dest, *src; WT_DECL_RET; WT_ITEM key, value; WT_LSM_CHUNK *chunk; uint32_t generation; uint64_t insert_count, record_count; u_int dest_id, end_chunk, i, nchunks, start_chunk, start_id, verb; int tret; bool created_chunk, create_bloom, locked, in_sync; const char *cfg[3]; const char *drop_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL }; bloom = NULL; chunk = NULL; dest = src = NULL; start_id = 0; created_chunk = create_bloom = locked = in_sync = false; /* Fast path if it's obvious no merges could be done. */ if (lsm_tree->nchunks < lsm_tree->merge_min && lsm_tree->merge_aggressiveness < WT_LSM_AGGRESSIVE_THRESHOLD) return (WT_NOTFOUND); /* * Use the lsm_tree lock to read the chunks (so no switches occur), but * avoid holding it while the merge is in progress: that may take a * long time. */ WT_RET(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; WT_ERR(__lsm_merge_span(session, lsm_tree, id, &start_chunk, &end_chunk, &record_count)); nchunks = (end_chunk + 1) - start_chunk; WT_ASSERT(session, nchunks > 0); start_id = lsm_tree->chunk[start_chunk]->id; /* Find the merge generation. */ for (generation = 0, i = 0; i < nchunks; i++) generation = WT_MAX(generation, lsm_tree->chunk[start_chunk + i]->generation + 1); WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); locked = false; /* Allocate an ID for the merge. */ dest_id = __wt_atomic_add32(&lsm_tree->last, 1); /* * We only want to do the chunk loop if we're running with verbose, * so we wrap these statements in the conditional. Avoid the loop * in the normal path. */ if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Merging %s chunks %u-%u into %u (%" PRIu64 " records)" ", generation %" PRIu32, lsm_tree->name, start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 ", size: %" PRIu64 ", records: %" PRIu64, lsm_tree->name, verb, lsm_tree->chunk[verb]->id, lsm_tree->chunk[verb]->generation, lsm_tree->chunk[verb]->size, lsm_tree->chunk[verb]->count)); } WT_ERR(__wt_calloc_one(session, &chunk)); created_chunk = true; chunk->id = dest_id; if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED) && (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) || start_chunk > 0) && record_count > 0) create_bloom = true; /* * Special setup for the merge cursor: * first, reset to open the dependent cursors; * then restrict the cursor to a specific number of chunks; * then set MERGE so the cursor doesn't track updates to the tree. */ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src)); F_SET(src, WT_CURSTD_RAW); WT_ERR(__wt_clsm_init_merge(src, start_chunk, start_id, nchunks)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); WT_ERR(ret); if (create_bloom) { WT_ERR(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk)); WT_ERR(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, record_count, lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom)); } /* Discard pages we read as soon as we're done with them. */ F_SET(session, WT_SESSION_NO_CACHE); cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor); cfg[1] = "bulk,raw,skip_sort_check"; cfg[2] = NULL; WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest)); #define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) WT_ERR(EINTR); WT_STAT_FAST_CONN_INCRV(session, lsm_rows_merged, LSM_MERGE_CHECK_INTERVAL); ++lsm_tree->merge_progressing; } WT_ERR(src->get_key(src, &key)); dest->set_key(dest, &key); WT_ERR(src->get_value(src, &value)); dest->set_value(dest, &value); WT_ERR(dest->insert(dest)); if (create_bloom) WT_ERR(__wt_bloom_insert(bloom, &key)); } WT_ERR_NOTFOUND_OK(ret); WT_STAT_FAST_CONN_INCRV(session, lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL); ++lsm_tree->merge_progressing; WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted.", record_count, insert_count)); /* * Closing and syncing the files can take a while. Set the * merge_syncing field so that compact knows it is still in * progress. */ (void)__wt_atomic_add32(&lsm_tree->merge_syncing, 1); in_sync = true; /* * We've successfully created the new chunk. Now install it. We need * to ensure that the NO_CACHE flag is cleared and the bloom filter * is closed (even if a step fails), so track errors but don't return * until we've cleaned up. */ WT_TRET(src->close(src)); WT_TRET(dest->close(dest)); src = dest = NULL; F_CLR(session, WT_SESSION_NO_CACHE); /* * We're doing advisory reads to fault the new trees into cache. * Don't block if the cache is full: our next unit of work may be to * discard some trees to free space. */ F_SET(session, WT_SESSION_NO_EVICTION); if (create_bloom) { if (ret == 0) WT_TRET(__wt_bloom_finalize(bloom)); /* * Read in a key to make sure the Bloom filters btree handle is * open before it becomes visible to application threads. * Otherwise application threads will stall while it is opened * and internal pages are read into cache. */ if (ret == 0) { WT_CLEAR(key); WT_TRET_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); } WT_TRET(__wt_bloom_close(bloom)); bloom = NULL; } WT_ERR(ret); /* * Open a handle on the new chunk before application threads attempt * to access it, opening it pre-loads internal pages into the file * system cache. */ cfg[1] = "checkpoint=" WT_CHECKPOINT; WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest)); WT_TRET(dest->close(dest)); dest = NULL; ++lsm_tree->merge_progressing; (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); in_sync = false; WT_ERR_NOTFOUND_OK(ret); WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; /* * Check whether we raced with another merge, and adjust the chunk * array offset as necessary. */ if (start_chunk >= lsm_tree->nchunks || lsm_tree->chunk[start_chunk]->id != start_id) for (start_chunk = 0; start_chunk < lsm_tree->nchunks; start_chunk++) if (lsm_tree->chunk[start_chunk]->id == start_id) break; /* * It is safe to error out here - since the update can only fail * prior to making updates to the tree. */ WT_ERR(__wt_lsm_merge_update_tree( session, lsm_tree, start_chunk, nchunks, chunk)); if (create_bloom) F_SET(chunk, WT_LSM_CHUNK_BLOOM); chunk->count = insert_count; chunk->generation = generation; F_SET(chunk, WT_LSM_CHUNK_ONDISK); /* * We have no current way of continuing if the metadata update fails, * so we will panic in that case. Put some effort into cleaning up * after ourselves here - so things have a chance of shutting down. * * Any errors that happened after the tree was locked are * fatal - we can't guarantee the state of the tree. */ if ((ret = __wt_lsm_meta_write(session, lsm_tree)) != 0) WT_PANIC_ERR(session, ret, "Failed finalizing LSM merge"); lsm_tree->dsk_gen++; /* Update the throttling while holding the tree lock. */ __wt_lsm_tree_throttle(session, lsm_tree, true); /* Schedule a pass to discard old chunks */ WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_DROP, 0, lsm_tree)); err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); if (in_sync) (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1); if (src != NULL) WT_TRET(src->close(src)); if (dest != NULL) WT_TRET(dest->close(dest)); if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); if (ret != 0 && created_chunk) { /* Drop the newly-created files on error. */ if (chunk->uri != NULL) { WT_WITH_SCHEMA_LOCK(session, tret = __wt_schema_drop(session, chunk->uri, drop_cfg)); WT_TRET(tret); } if (create_bloom && chunk->bloom_uri != NULL) { WT_WITH_SCHEMA_LOCK(session, tret = __wt_schema_drop( session, chunk->bloom_uri, drop_cfg)); WT_TRET(tret); } __wt_free(session, chunk->bloom_uri); __wt_free(session, chunk->uri); __wt_free(session, chunk); if (ret == EINTR) WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Merge aborted due to close")); else WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", __wt_strerror(session, ret, NULL, 0))); } F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); }