void vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, uint64_t max_offset, uint64_t txg, dmu_tx_t *tx) { vdev_indirect_birth_entry_phys_t vibe; uint64_t old_size; uint64_t new_size; vdev_indirect_birth_entry_phys_t *new_entries; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx))); ASSERT(vdev_indirect_births_verify(vib)); dmu_buf_will_dirty(vib->vib_dbuf, tx); vibe.vibe_offset = max_offset; vibe.vibe_phys_birth_txg = txg; old_size = vdev_indirect_births_size_impl(vib); dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe), &vibe, tx); vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); new_entries = kmem_alloc(new_size, KM_SLEEP); if (old_size > 0) { bcopy(vib->vib_entries, new_entries, old_size); kmem_free(vib->vib_entries, old_size); } new_entries[vib->vib_phys->vib_count - 1] = vibe; vib->vib_entries = new_entries; }
zcrypt_keystore_node_t * zcrypt_keystore_find_node(spa_t *spa, uint64_t dsobj, boolean_t config_rwlock_held) { zcrypt_keystore_node_t search; zcrypt_keystore_node_t *found = NULL; rw_enter(&spa->spa_keystore->sk_lock, RW_READER); if (avl_is_empty(&spa->spa_keystore->sk_dslkeys)) goto out; search.skn_os = dsobj; found = avl_find(&spa->spa_keystore->sk_dslkeys, &search, NULL); if (found == NULL) { int error; dsl_pool_t *dp = spa_get_dsl(spa); dsl_dataset_t *ds; boolean_t need_lock; rw_exit(&spa->spa_keystore->sk_lock); need_lock = !dsl_pool_sync_context(dp) && !config_rwlock_held; if (need_lock) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); if (need_lock) rw_exit(&dp->dp_config_rwlock); rw_enter(&spa->spa_keystore->sk_lock, RW_READER); if (!error) { if (dsl_dataset_is_snapshot(ds)) { search.skn_os = ds->ds_dir->dd_phys->dd_head_dataset_obj; found = avl_find(&spa->spa_keystore->sk_dslkeys, &search, NULL); } dsl_dataset_rele(ds, FTAG); } } out: rw_exit(&spa->spa_keystore->sk_lock); return (found); }
void spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) { /* * We create feature flags ZAP objects in two instances: during pool * creation and during pool upgrade. */ ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL)); spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURES_FOR_READ, tx); spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURES_FOR_WRITE, tx); spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURE_DESCRIPTIONS, tx); }
void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp) { ASSERT(dsl_pool_sync_context(dp)); zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags)); }
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **ddp) { dmu_buf_t *dbuf; dsl_dir_t *dd; int err; ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || dsl_pool_sync_context(dp)); err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); if (err) return (err); dd = dmu_buf_get_user(dbuf); #ifdef ZFS_DEBUG { dmu_object_info_t doi; dmu_object_info_from_db(dbuf, &doi); ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); } #endif if (dd == NULL) { dsl_dir_t *winner; dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; dd->dd_phys = dbuf->db_data; mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), offsetof(dsl_prop_cb_record_t, cbr_node)); dsl_dir_snap_cmtime_update(dd); if (dd->dd_phys->dd_parent_obj) { err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, NULL, dd, &dd->dd_parent); if (err) goto errout; if (tail) { #ifdef ZFS_DEBUG uint64_t foundobj; err = zap_lookup(dp->dp_meta_objset, dd->dd_parent->dd_phys->dd_child_dir_zapobj, tail, sizeof (foundobj), 1, &foundobj); ASSERT(err || foundobj == ddobj); #endif (void) strcpy(dd->dd_myname, tail); } else { err = zap_value_search(dp->dp_meta_objset, dd->dd_parent->dd_phys->dd_child_dir_zapobj, ddobj, 0, dd->dd_myname); } if (err) goto errout; } else { (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); } if (dsl_dir_is_clone(dd)) { dmu_buf_t *origin_bonus; dsl_dataset_phys_t *origin_phys; /* * We can't open the origin dataset, because * that would require opening this dsl_dir. * Just look at its phys directly instead. */ err = dmu_bonus_hold(dp->dp_meta_objset, dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); if (err) goto errout; origin_phys = origin_bonus->db_data; dd->dd_origin_txg = origin_phys->ds_creation_txg; dmu_buf_rele(origin_bonus, FTAG); } winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, dsl_dir_evict); if (winner) { if (dd->dd_parent) dsl_dir_close(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; } else { spa_open_ref(dp->dp_spa, dd); } } /* * The dsl_dir_t has both open-to-close and instantiate-to-evict * holds on the spa. We need the open-to-close holds because * otherwise the spa_refcnt wouldn't change when we open a * dir which the spa also has open, so we could incorrectly * think it was OK to unload/export/destroy the pool. We need * the instantiate-to-evict hold because the dsl_dir_t has a * pointer to the dd_pool, which has a pointer to the spa_t. */ spa_open_ref(dp->dp_spa, tag); ASSERT3P(dd->dd_pool, ==, dp); ASSERT3U(dd->dd_object, ==, ddobj); ASSERT3P(dd->dd_dbuf, ==, dbuf); *ddp = dd; return (0); errout: if (dd->dd_parent) dsl_dir_close(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); return (err); }
/* * same as dsl_open_dir, ignore the first component of name and use the * spa instead */ int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) { char buf[MAXNAMELEN]; const char *next, *nextnext = NULL; int err; dsl_dir_t *dd; dsl_pool_t *dp; uint64_t ddobj; int openedspa = FALSE; dprintf("%s\n", name); err = getcomponent(name, buf, &next); if (err) return (err); if (spa == NULL) { err = spa_open(buf, &spa, FTAG); if (err) { dprintf("spa_open(%s) failed\n", buf); return (err); } openedspa = TRUE; /* XXX this assertion belongs in spa_open */ ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); } dp = spa_get_dsl(spa); rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); if (err) { rw_exit(&dp->dp_config_rwlock); if (openedspa) spa_close(spa, FTAG); return (err); } while (next != NULL) { dsl_dir_t *child_ds; err = getcomponent(next, buf, &nextnext); if (err) break; ASSERT(next[0] != '\0'); if (next[0] == '@') break; dprintf("looking up %s in obj%lld\n", buf, dd->dd_phys->dd_child_dir_zapobj); err = zap_lookup(dp->dp_meta_objset, dd->dd_phys->dd_child_dir_zapobj, buf, sizeof (ddobj), 1, &ddobj); if (err) { if (err == ENOENT) err = 0; break; } err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); if (err) break; dsl_dir_close(dd, tag); dd = child_ds; next = nextnext; } rw_exit(&dp->dp_config_rwlock); if (err) { dsl_dir_close(dd, tag); if (openedspa) spa_close(spa, FTAG); return (err); } /* * It's an error if there's more than one component left, or * tailp==NULL and there's any component left. */ if (next != NULL && (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { /* bad path name */ dsl_dir_close(dd, tag); dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); err = ENOENT; } if (tailp) *tailp = next; if (openedspa) spa_close(spa, FTAG); *ddp = dd; return (err); }
/* * Find all objsets under name, call func on each */ int dmu_objset_find_spa(spa_t *spa, const char *name, int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) { dsl_dir_t *dd; dsl_pool_t *dp; dsl_dataset_t *ds; zap_cursor_t zc; zap_attribute_t *attr; char *child; uint64_t thisobj; int err; if (name == NULL) name = spa_name(spa); err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); if (err) return (err); /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ if (dd->dd_myname[0] == '$') { dsl_dir_close(dd, FTAG); return (0); } thisobj = dd->dd_phys->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); dp = dd->dd_pool; /* * Iterate over all children. */ if (flags & DS_FIND_CHILDREN) { for (zap_cursor_init(&zc, dp->dp_meta_objset, dd->dd_phys->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { ASSERT(attr->za_integer_length == sizeof (uint64_t)); ASSERT(attr->za_num_integers == 1); child = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) strcpy(child, name); (void) strcat(child, "/"); (void) strcat(child, attr->za_name); err = dmu_objset_find_spa(spa, child, func, arg, flags); kmem_free(child, MAXPATHLEN); if (err) break; } zap_cursor_fini(&zc); if (err) { dsl_dir_close(dd, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); return (err); } } /* * Iterate over all snapshots. */ if (flags & DS_FIND_SNAPSHOTS) { if (!dsl_pool_sync_context(dp)) rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); if (!dsl_pool_sync_context(dp)) rw_exit(&dp->dp_config_rwlock); if (err == 0) { uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; dsl_dataset_rele(ds, FTAG); for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { ASSERT(attr->za_integer_length == sizeof (uint64_t)); ASSERT(attr->za_num_integers == 1); child = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) strcpy(child, name); (void) strcat(child, "@"); (void) strcat(child, attr->za_name); err = func(spa, attr->za_first_integer, child, arg); kmem_free(child, MAXPATHLEN); if (err) break; } zap_cursor_fini(&zc); } } dsl_dir_close(dd, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); if (err) return (err); /* * Apply to self if appropriate. */ err = func(spa, thisobj, name, arg); return (err); }
boolean_t vdev_indirect_should_condense(vdev_t *vd) { vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; spa_t *spa = vd->vdev_spa; ASSERT(dsl_pool_sync_context(spa->spa_dsl_pool)); if (!zfs_condense_indirect_vdevs_enable) return (B_FALSE); /* * We can only condense one indirect vdev at a time. */ if (spa->spa_condensing_indirect != NULL) return (B_FALSE); if (spa_shutting_down(spa)) return (B_FALSE); /* * The mapping object size must not change while we are * condensing, so we can only condense indirect vdevs * (not vdevs that are still in the middle of being removed). */ if (vd->vdev_ops != &vdev_indirect_ops) return (B_FALSE); /* * If nothing new has been marked obsolete, there is no * point in condensing. */ if (vd->vdev_obsolete_sm == NULL) { ASSERT0(vdev_obsolete_sm_object(vd)); return (B_FALSE); } ASSERT(vd->vdev_obsolete_sm != NULL); ASSERT3U(vdev_obsolete_sm_object(vd), ==, space_map_object(vd->vdev_obsolete_sm)); uint64_t bytes_mapped = vdev_indirect_mapping_bytes_mapped(vim); uint64_t bytes_obsolete = space_map_allocated(vd->vdev_obsolete_sm); uint64_t mapping_size = vdev_indirect_mapping_size(vim); uint64_t obsolete_sm_size = space_map_length(vd->vdev_obsolete_sm); ASSERT3U(bytes_obsolete, <=, bytes_mapped); /* * If a high percentage of the bytes that are mapped have become * obsolete, condense (unless the mapping is already small enough). * This has a good chance of reducing the amount of memory used * by the mapping. */ if (bytes_obsolete * 100 / bytes_mapped >= zfs_indirect_condense_obsolete_pct && mapping_size > zfs_condense_min_mapping_bytes) { zfs_dbgmsg("should condense vdev %llu because obsolete " "spacemap covers %d%% of %lluMB mapping", (u_longlong_t)vd->vdev_id, (int)(bytes_obsolete * 100 / bytes_mapped), (u_longlong_t)bytes_mapped / 1024 / 1024); return (B_TRUE); } /* * If the obsolete space map takes up too much space on disk, * condense in order to free up this disk space. */ if (obsolete_sm_size >= zfs_condense_max_obsolete_bytes) { zfs_dbgmsg("should condense vdev %llu because obsolete sm " "length %lluMB >= max size %lluMB", (u_longlong_t)vd->vdev_id, (u_longlong_t)obsolete_sm_size / 1024 / 1024, (u_longlong_t)zfs_condense_max_obsolete_bytes / 1024 / 1024); return (B_TRUE); } return (B_FALSE); }