/* * __snapshot_worker -- * Snapshot the tree. */ static int __snapshot_worker( WT_SESSION_IMPL *session, const char *name, int discard, snapshot_op op) { WT_BTREE *btree; WT_DECL_RET; WT_SNAPSHOT *deleted, *snap, *snapbase; int force, matched, tracked; btree = session->btree; matched = tracked = 0; snap = snapbase = NULL; /* Snapshots are single-threaded. */ __wt_writelock(session, btree->snaplock); /* Set the name to the default, if we aren't provided one. */ if (op == SNAPSHOT && name == NULL) { force = 0; name = WT_INTERNAL_SNAPSHOT; } else force = 1; /* * Get the list of snapshots for this file. If there's no reference, * this file is dead. Discard it from the cache without bothering to * write any dirty pages. */ if ((ret = __wt_meta_snaplist_get(session, btree->name, &snapbase)) != 0) { if (ret == WT_NOTFOUND) ret = __wt_bt_cache_flush( session, NULL, WT_SYNC_DISCARD_NOWRITE, 0); goto err; } switch (op) { case SNAPSHOT: /* * Create a new, possibly named, snapshot. Review existing * snapshots, deleting default snapshots and snapshots with * matching names, add the new snapshot entry at the end of * the list. */ WT_SNAPSHOT_FOREACH(snapbase, snap) if (strcmp(snap->name, name) == 0 || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); WT_ERR(__wt_strdup(session, name, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP: /* * Drop all snapshots with matching names. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { /* * There should be only one snapshot with a matching * name, but it doesn't hurt to check the rest. */ if (strcmp(snap->name, name) == 0) matched = 1; else if (strcmp(snap->name, WT_INTERNAL_SNAPSHOT) != 0) continue; F_SET(snap, WT_SNAP_DELETE); } if (!matched) goto nomatch; WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_ALL: /* * Drop all snapshots. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) F_SET(snap, WT_SNAP_DELETE); WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_FROM: /* * Drop all snapshots after, and including, the named snapshot. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { if (strcmp(snap->name, name) == 0) matched = 1; if (matched || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); } if (!matched) goto nomatch; WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_TO: /* * Drop all snapshots before, and including, the named snapshot. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { if (!matched || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); if (strcmp(snap->name, name) == 0) matched = 1; } if (!matched) nomatch: WT_ERR_MSG(session, EINVAL, "no snapshot named %s was found", name); WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; } /* * Lock the snapshots that will be deleted. * * Snapshots are only locked when tracking is enabled, which covers * sync and drop operations, but not close. The reasoning is that * there should be no access to a snapshot during close, because any * thread accessing a snapshot will also have the current file handle * open. */ if (WT_META_TRACKING(session)) WT_SNAPSHOT_FOREACH(snapbase, deleted) if (F_ISSET(deleted, WT_SNAP_DELETE)) WT_ERR(__wt_session_lock_snapshot(session, deleted->name, WT_BTREE_EXCLUSIVE)); WT_ERR(__wt_bt_cache_flush( session, snapbase, discard ? WT_SYNC_DISCARD : WT_SYNC, force)); /* If there was a snapshot, update the metadata. */ if (snap->raw.data == NULL) { if (force) WT_ERR_MSG(session, EINVAL, "cache flush failed to create a snapshot"); } else { WT_ERR(__wt_meta_snaplist_set(session, btree->name, snapbase)); /* * If tracking is enabled, defer making pages available until * the end of the transaction. The exception is if the handle * is being discarded: in that case, it will be gone by the * time we try to apply or unroll the meta tracking event. */ if (WT_META_TRACKING(session) && !discard) { WT_ERR(__wt_meta_track_checkpoint(session)); tracked = 1; } else WT_ERR(__wt_bm_snapshot_resolve(session, snapbase)); } err: __wt_meta_snaplist_free(session, snapbase); if (!tracked) __wt_rwunlock(session, btree->snaplock); return (ret); }
/* * __wt_lsm_stat_init -- * Initialize a LSM statistics structure. */ int __wt_lsm_stat_init(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_CURSOR_STAT *cst, uint32_t flags) { WT_CURSOR *stat_cursor; WT_DECL_ITEM(uribuf); WT_DECL_RET; WT_DSRC_STATS *stats; WT_LSM_CHUNK *chunk; const char *cfg[] = API_CONF_DEFAULTS( session, open_cursor, "statistics_fast=on"); const char *disk_cfg[] = API_CONF_DEFAULTS(session, open_cursor, "checkpoint=WiredTigerCheckpoint,statistics_fast=on"); const char *desc, *pvalue; uint64_t value; u_int i; int locked, stat_key; WT_UNUSED(flags); locked = 0; WT_ERR(__wt_scr_alloc(session, 0, &uribuf)); /* Clear the statistics we are about to recalculate. */ if (cst->stats != NULL) stats = (WT_DSRC_STATS *)cst->stats; else { WT_ERR(__wt_calloc_def(session, 1, &stats)); __wt_stat_init_dsrc_stats(stats); cst->stats_first = cst->stats = (WT_STATS *)stats; cst->stats_count = sizeof(*stats) / sizeof(WT_STATS); } *stats = lsm_tree->stats; if (LF_ISSET(WT_STATISTICS_CLEAR)) __wt_stat_clear_dsrc_stats(&lsm_tree->stats); /* Hold the LSM lock so that we can safely walk through the chunks. */ WT_ERR(__wt_readlock(session, lsm_tree->rwlock)); locked = 1; /* Set the stats for this run. */ WT_STAT_SET(stats, lsm_chunk_count, lsm_tree->nchunks); for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; if (chunk->generation > (uint32_t)WT_STAT(stats, lsm_generation_max)) WT_STAT_SET(stats, lsm_generation_max, chunk->generation); /* * LSM chunk reads happen from a checkpoint, so get the * statistics for a checkpoint if one exists. */ WT_ERR(__wt_buf_fmt( session, uribuf, "statistics:%s", chunk->uri)); ret = __wt_curstat_open(session, uribuf->data, F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? disk_cfg : cfg, &stat_cursor); /* * XXX kludge: we may have an empty chunk where no checkpoint * was written. If so, try to open the ordinary handle on that * chunk instead. */ if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) ret = __wt_curstat_open( session, uribuf->data, cfg, &stat_cursor); WT_ERR(ret); while ((ret = stat_cursor->next(stat_cursor)) == 0) { WT_ERR(stat_cursor->get_key(stat_cursor, &stat_key)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRKV(stats, stat_key, value); } WT_ERR_NOTFOUND_OK(ret); WT_ERR(stat_cursor->close(stat_cursor)); if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) continue; WT_STAT_INCR(stats, bloom_count); WT_STAT_INCRV(stats, bloom_size, (chunk->count * lsm_tree->bloom_bit_count) / 8); WT_ERR(__wt_buf_fmt( session, uribuf, "statistics:%s", chunk->bloom_uri)); WT_ERR(__wt_curstat_open(session, uribuf->data, cfg, &stat_cursor)); stat_cursor->set_key( stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_CLEAN); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRV(stats, cache_eviction_clean, value); WT_STAT_INCRV(stats, bloom_page_evict, value); stat_cursor->set_key( stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_DIRTY); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRV(stats, cache_eviction_dirty, value); WT_STAT_INCRV(stats, bloom_page_evict, value); stat_cursor->set_key( stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_FAIL); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRV(stats, cache_eviction_fail, value); stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_CACHE_READ); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRV(stats, cache_read, value); WT_STAT_INCRV(stats, bloom_page_read, value); stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_CACHE_WRITE); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); WT_STAT_INCRV(stats, cache_write, value); WT_ERR(stat_cursor->close(stat_cursor)); } err: if (locked) WT_TRET(__wt_rwunlock(session, lsm_tree->rwlock)); __wt_scr_free(&uribuf); return (ret); }
/* * __sweep -- * Close unused dhandles on the connection dhandle list. */ static int __sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *dhandle_next; WT_DECL_RET; time_t now; conn = S2C(session); /* * Session's cache handles unless the session itself is closed, at which * time the handle reference counts are immediately decremented. Don't * discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); dhandle = SLIST_FIRST(&conn->dhlh); for (; dhandle != NULL; dhandle = dhandle_next) { dhandle_next = SLIST_NEXT(dhandle, l); if (dhandle->session_ref != 0 || now - dhandle->timeofdeath <= WT_DHANDLE_SWEEP_WAIT) continue; /* * We have a candidate for closing; if it's open, acquire an * exclusive lock on the handle and close it (the lock blocks * threads from opening the handle). We might be blocking an * open for a fairly long time (over disk I/O), but the handle * has been quiescent for awhile. * * The close can fail if an update cannot be written (updates in * a no-longer-referenced file might not yet be globally visible * if sessions have disjoint sets of files open). If the handle * is busy, skip it, we'll retry the close the next time, after * the transaction state has progressed. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { /* * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we * want opens to block on us rather than returning an * EBUSY error to the application. */ ret = __wt_try_writelock(session, dhandle->rwlock); if (ret == EBUSY) { ret = 0; continue; } WT_RET(ret); WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_btree_sync_and_close(session)); if (ret == EBUSY) ret = 0; WT_TRET(__wt_rwunlock(session, dhandle->rwlock)); WT_RET(ret); } /* * Attempt to discard the handle (the called function checks the * handle-open flag after acquiring appropriate locks, which is * why we don't do any special handling of EBUSY returns above, * that path never cleared the handle-open flag. */ ret = __wt_conn_dhandle_discard_single(session, dhandle, 0); if (ret == EBUSY) ret = 0; WT_RET(ret); } return (0); }
/* * __wt_lsm_tree_rename -- * Rename an LSM tree. */ int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[]) { WT_DECL_RET; WT_ITEM buf; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; const char *old; u_int i; int locked; old = NULL; WT_CLEAR(buf); locked = 0; /* Get the LSM tree. */ WT_RET(__wt_lsm_tree_get(session, olduri, 1, &lsm_tree)); /* Shut down the LSM worker. */ WT_ERR(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ WT_ERR(__wt_try_writelock(session, lsm_tree->rwlock)); locked = 1; /* Set the new name. */ WT_ERR(__lsm_tree_set_name(session, lsm_tree, newuri)); /* Rename the chunks. */ for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; old = chunk->uri; chunk->uri = NULL; WT_ERR(__wt_lsm_tree_chunk_name( session, lsm_tree, chunk->id, &buf)); chunk->uri = __wt_buf_steal(session, &buf, NULL); WT_ERR(__wt_schema_rename(session, old, chunk->uri, cfg)); __wt_free(session, old); if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { old = chunk->bloom_uri; chunk->bloom_uri = NULL; WT_ERR(__wt_lsm_tree_bloom_name( session, lsm_tree, chunk->id, &buf)); chunk->bloom_uri = __wt_buf_steal(session, &buf, NULL); F_SET(chunk, WT_LSM_CHUNK_BLOOM); WT_ERR(__wt_schema_rename( session, old, chunk->uri, cfg)); __wt_free(session, old); } } ret = __wt_rwunlock(session, lsm_tree->rwlock); locked = 0; if (ret == 0) ret = __wt_lsm_meta_write(session, lsm_tree); if (ret == 0) ret = __wt_metadata_remove(session, olduri); err: if (locked) WT_TRET(__wt_rwunlock(session, lsm_tree->rwlock)); if (old != NULL) __wt_free(session, old); /* * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. */ WT_TRET(__lsm_tree_discard(session, lsm_tree)); return (ret); }