/* * Called when an unmount is requested and certain sanity checks have * already passed. At this point no dentries or inodes have been reclaimed * from their respective caches. We drop the extra reference on the .zfs * control directory to allow everything to be reclaimed. All snapshots * must already have been unmounted to reach this point. */ void zfs_preumount(struct super_block *sb) { zfs_sb_t *zsb = sb->s_fs_info; /* zsb is NULL when zfs_domount fails during mount */ if (zsb) { zfsctl_destroy(sb->s_fs_info); /* * Wait for iput_async before entering evict_inodes in * generic_shutdown_super. The reason we must finish before * evict_inodes is when lazytime is on, or when zfs_purgedir * calls zfs_zget, iput would bump i_count from 0 to 1. This * would race with the i_count check in evict_inodes. This means * it could destroy the inode while we are still using it. * * We wait for two passes. xattr directories in the first pass * may add xattr entries in zfs_purgedir, so in the second pass * we wait for them. We don't use taskq_wait here because it is * a pool wide taskq. Other mounted filesystems can constantly * do iput_async and there's no guarantee when taskq will be * empty. */ taskq_wait_outstanding(dsl_pool_iput_taskq( dmu_objset_pool(zsb->z_os)), 0); taskq_wait_outstanding(dsl_pool_iput_taskq( dmu_objset_pool(zsb->z_os)), 0); } }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } /* ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); */ tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
zilog_t * zil_alloc(objset_t *os, zil_header_t *zh_phys) { zilog_t *zilog; zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP); zilog->zl_header = zh_phys; zilog->zl_os = os; zilog->zl_spa = dmu_objset_spa(os); zilog->zl_dmu_pool = dmu_objset_pool(os); zilog->zl_destroy_txg = TXG_INITIAL - 1; mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zilog->zl_itx_list, sizeof (itx_t), offsetof(itx_t, itx_node)); list_create(&zilog->zl_lwb_list, sizeof (lwb_t), offsetof(lwb_t, lwb_node)); mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zilog->zl_vdev_tree, zil_vdev_compare, sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node)); cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL); cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL); return (zilog); }
/* * Ensure the zap is flushed then inform the VFS of the capacity change. */ static int zvol_update_volsize(uint64_t volsize, objset_t *os) { dmu_tx_t *tx; int error; uint64_t txg; ASSERT(MUTEX_HELD(&zvol_state_lock)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (SET_ERROR(error)); } txg = dmu_tx_get_txg(tx); error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); dmu_tx_commit(tx); txg_wait_synced(dmu_objset_pool(os), txg); if (error == 0) error = dmu_free_long_range(os, ZVOL_OBJ, volsize, DMU_OBJECT_END); return (error); }
/* * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj). */ uint64_t bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx) { zfeature_info_t *empty_bpobj_feat = &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ]; spa_t *spa = dmu_objset_spa(os); dsl_pool_t *dp = dmu_objset_pool(os); if (spa_feature_is_enabled(spa, empty_bpobj_feat)) { if (!spa_feature_is_active(spa, empty_bpobj_feat)) { ASSERT0(dp->dp_empty_bpobj); dp->dp_empty_bpobj = bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj, tx) == 0); } spa_feature_incr(spa, empty_bpobj_feat, tx); ASSERT(dp->dp_empty_bpobj != 0); return (dp->dp_empty_bpobj); } else { return (bpobj_alloc(os, blocksize, tx)); } }
void dmu_objset_rele(objset_t *os, void *tag) { dsl_pool_t *dp = dmu_objset_pool(os); dsl_dataset_rele(os->os_dsl_dataset, tag); dsl_pool_rele(dp, tag); }
/* ARGSUSED */ static int zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, offset_t *offp, offset_t *nextp, void *data, int flags) { zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; char snapname[ZFS_MAX_DATASET_NAME_LEN]; uint64_t id, cookie; boolean_t case_conflict; int error; ZFS_ENTER(zfsvfs); cookie = *offp; dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), snapname, &id, &cookie, &case_conflict); dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); if (error) { ZFS_EXIT(zfsvfs); if (error == ENOENT) { *eofp = 1; return (0); } return (error); } if (flags & V_RDDIR_ENTFLAGS) { edirent_t *eodp = dp; (void) strcpy(eodp->ed_name, snapname); eodp->ed_ino = ZFSCTL_INO_SNAP(id); eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; } else { struct dirent64 *odp = dp; (void) strcpy(odp->d_name, snapname); odp->d_ino = ZFSCTL_INO_SNAP(id); } *nextp = cookie; ZFS_EXIT(zfsvfs); return (0); }
static int osd_object_sync(const struct lu_env *env, struct dt_object *dt) { struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt)); ENTRY; /* XXX: no other option than syncing the whole filesystem until we * support ZIL */ txg_wait_synced(dmu_objset_pool(osd->od_objset.os), 0ULL); RETURN(0); }
static int zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx) { zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode); fstrans_cookie_t cookie; char snapname[MAXNAMELEN]; boolean_t case_conflict; uint64_t id, pos; int error = 0; ZFS_ENTER(zsb); cookie = spl_fstrans_mark(); if (!dir_emit_dots(filp, ctx)) goto out; pos = ctx->pos; while (error == 0) { dsl_pool_config_enter(dmu_objset_pool(zsb->z_os), FTAG); error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN, snapname, &id, &pos, &case_conflict); dsl_pool_config_exit(dmu_objset_pool(zsb->z_os), FTAG); if (error) goto out; if (!dir_emit(ctx, snapname, strlen(snapname), ZFSCTL_INO_SHARES - id, DT_DIR)) goto out; ctx->pos = pos; } out: spl_fstrans_unmark(cookie); ZFS_EXIT(zsb); if (error == -ENOENT) return (0); return (error); }
/*ARGSUSED*/ int zfs_sync(struct super_block *sb, int wait, cred_t *cr) { zfs_sb_t *zsb = sb->s_fs_info; /* * Data integrity is job one. We don't want a compromised kernel * writing to the storage pool, so we never sync during panic. */ if (unlikely(oops_in_progress)) return (0); /* * Semantically, the only requirement is that the sync be initiated. * The DMU syncs out txgs frequently, so there's nothing to do. */ if (!wait) return (0); if (zsb != NULL) { /* * Sync a specific filesystem. */ dsl_pool_t *dp; ZFS_ENTER(zsb); dp = dmu_objset_pool(zsb->z_os); /* * If the system is shutting down, then skip any * filesystems which may exist on a suspended pool. */ if (spa_suspended(dp->dp_spa)) { ZFS_EXIT(zsb); return (0); } if (zsb->z_log != NULL) zil_commit(zsb->z_log, 0); ZFS_EXIT(zsb); } else { /* * Sync all ZFS filesystems. This is what happens when you * run sync(1M). Unlike other filesystems, ZFS honors the * request by waiting for all pools to commit all dirty data. */ spa_sync_allpools(); } return (0); }
static int zfs_vfs_sync(struct mount *mp, __unused int waitfor, __unused vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); ZFS_ENTER(zfsvfs); /* * Mac OS X needs a file system modify time * * We use the mtime of the "com.apple.system.mtime" * extended attribute, which is associated with the * file system root directory. * * Here we sync any mtime changes to this attribute. */ if (zfsvfs->z_mtime_vp != NULL) { timestruc_t mtime; znode_t *zp; top: zp = VTOZ(zfsvfs->z_mtime_vp); ZFS_TIME_DECODE(&mtime, zp->z_phys->zp_mtime); if (zfsvfs->z_last_mtime_synced < mtime.tv_sec) { dmu_tx_t *tx; int error; tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); } else { dmu_buf_will_dirty(zp->z_dbuf, tx); dmu_tx_commit(tx); zfsvfs->z_last_mtime_synced = mtime.tv_sec; } } } if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, UINT64_MAX, 0); else txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); ZFS_EXIT(zfsvfs); return (0); }
void bpobj_decr_empty(objset_t *os, dmu_tx_t *tx) { dsl_pool_t *dp = dmu_objset_pool(os); spa_feature_decr(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ, tx); if (!spa_feature_is_active(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ)) { VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, tx)); VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx)); dp->dp_empty_bpobj = 0; }
static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, __u64 start, __u64 end) { struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt)); ENTRY; /* XXX: no other option than syncing the whole filesystem until we * support ZIL. If the object tracked the txg that it was last * modified in, it could pass that txg here instead of "0". Maybe * the changes are already committed, so no wait is needed at all? */ txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL); RETURN(0); }
int zfs_vnop_ioctl_fullfsync(struct vnode *vp, vfs_context_t ct, zfsvfs_t *zfsvfs) { int error; error = zfs_fsync(vp, /*syncflag*/0, NULL, (caller_context_t *)ct); if (error) return (error); if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, 0); else txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); return (0); }
int dmu_objset_userspace_upgrade(objset_t *os) { uint64_t obj; int err = 0; if (dmu_objset_userspace_present(os)) return (0); if (!dmu_objset_userused_enabled(os->os)) return (ENOTSUP); if (dmu_objset_is_snapshot(os)) return (EINVAL); /* * We simply need to mark every object dirty, so that it will be * synced out and now accounted. If this is called * concurrently, or if we already did some work before crashing, * that's fine, since we track each object's accounted state * independently. */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { dmu_tx_t *tx; dmu_buf_t *db; int objerr; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); objerr = dmu_bonus_hold(os, obj, FTAG, &db); if (objerr) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); if (objerr) { dmu_tx_abort(tx); continue; } dmu_buf_will_dirty(db, tx); dmu_buf_rele(db, FTAG); dmu_tx_commit(tx); } os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; txg_wait_synced(dmu_objset_pool(os), 0); return (0); }
/*ARGSUSED*/ int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) { /* * Data integrity is job one. We don't want a compromised kernel * writing to the storage pool, so we never sync during panic. */ if (panicstr) return (0); /* * SYNC_ATTR is used by fsflush() to force old filesystems like UFS * to sync metadata, which they would otherwise cache indefinitely. * Semantically, the only requirement is that the sync be initiated. * The DMU syncs out txgs frequently, so there's nothing to do. */ if (flag & SYNC_ATTR) return (0); if (vfsp != NULL) { /* * Sync a specific filesystem. */ zfsvfs_t *zfsvfs = vfsp->vfs_data; ZFS_ENTER(zfsvfs); if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, UINT64_MAX, 0); else txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); ZFS_EXIT(zfsvfs); } else { /* * Sync all ZFS filesystems. This is what happens when you * run sync(1M). Unlike other filesystems, ZFS honors the * request by waiting for all pools to commit all dirty data. */ spa_sync_allpools(); } return (0); }
static void zvol_last_close(zvol_state_t *zv) { zil_close(zv->zv_zilog); zv->zv_zilog = NULL; dmu_buf_rele(zv->zv_dbuf, zvol_tag); zv->zv_dbuf = NULL; /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && !(zv->zv_flags & ZVOL_RDONLY)) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); (void) dmu_objset_evict_dbufs(zv->zv_objset); dmu_objset_disown(zv->zv_objset, zvol_tag); zv->zv_objset = NULL; }
/* * When we are called, os MUST refer to an objset associated with a dataset * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner * == tag. We will then release and reacquire ownership of the dataset while * holding the pool config_rwlock to avoid intervening namespace or ownership * changes may occur. * * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to * release the hold on its dataset and acquire a new one on the dataset of the * same name so that it can be partially torn down and reconstructed. */ void dmu_objset_refresh_ownership(objset_t *os, void *tag) { dsl_pool_t *dp; dsl_dataset_t *ds, *newds; char name[MAXNAMELEN]; ds = os->os_dsl_dataset; VERIFY3P(ds, !=, NULL); VERIFY3P(ds->ds_owner, ==, tag); VERIFY(dsl_dataset_long_held(ds)); dsl_dataset_name(ds, name); dp = dmu_objset_pool(os); dsl_pool_config_enter(dp, FTAG); dmu_objset_disown(os, tag); VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); VERIFY3P(newds, ==, os->os_dsl_dataset); dsl_pool_config_exit(dp, FTAG); }
/*ARGSUSED*/ static int zfs_sync(vfs_t *vfsp, int waitfor) { /* * Data integrity is job one. We don't want a compromised kernel * writing to the storage pool, so we never sync during panic. */ if (panicstr) return (0); if (vfsp != NULL) { /* * Sync a specific filesystem. */ zfsvfs_t *zfsvfs = vfsp->vfs_data; int error; error = vfs_stdsync(vfsp, waitfor); if (error != 0) return (error); ZFS_ENTER(zfsvfs); if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, UINT64_MAX, 0); else txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); ZFS_EXIT(zfsvfs); } else { /* * Sync all ZFS filesystems. This is what happens when you * run sync(1M). Unlike other filesystems, ZFS honors the * request by waiting for all pools to commit all dirty data. */ spa_sync_allpools(); } return (0); }
/* * Teardown the zfs_sb_t. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting) { znode_t *zp; rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's super block as the * parent filesystem and all of its snapshots have their * inode's super block set to the parent's filesystem's * super block. Note, 'z_parent' is self referential * for non-snapshots. */ shrink_dcache_sb(zsb->z_parent->z_sb); } /* * If someone has not already unmounted this file system, * drain the iput_taskq to ensure all active references to the * zfs_sb_t have been handled only then can it be safely destroyed. */ if (zsb->z_os) taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os))); /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zsb->z_log) { zil_close(zsb->z_log); zsb->z_log = NULL; } rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) { rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); return (EIO); } /* * At this point there are no VFS ops active, and any new VFS ops * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * * Release all holds on dbufs. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp != NULL; zp = list_next(&zsb->z_all_znodes, zp)) { if (zp->z_sa_hdl) { ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0); zfs_znode_dmu_fini(zp); } } mutex_exit(&zsb->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new VFS ops * unblock. zfs_inactive will have the unmounted behavior, and all * other VFS ops will fail with EIO. */ if (unmounting) { zsb->z_unmounted = B_TRUE; rrw_exit(&zsb->z_teardown_lock, FTAG); rw_exit(&zsb->z_teardown_inactive_lock); } /* * z_os will be NULL if there was an error in attempting to reopen * zsb, so just return as the properties had already been * * unregistered and cached data had been evicted before. */ if (zsb->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zsb); /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) && !zfs_is_readonly(zsb)) txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); dmu_objset_evict_dbufs(zsb->z_os); return (0); }
/* * Teardown the zfsvfs::z_os. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ static int zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) { znode_t *zp; rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's vfsp as the parent * filesystem and all of its snapshots have their vnode's * v_vfsp set to the parent's filesystem's vfsp. Note, * 'z_parent' is self referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); #ifdef FREEBSD_NAMECACHE cache_purgevfs(zfsvfs->z_parent->z_vfs); #endif } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); return (EIO); } /* * At this point there are no vops active, and any new vops will * fail with EIO since we have z_teardown_lock for writer (only * relavent for forced unmount). * * Release all holds on dbufs. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) if (zp->z_dbuf) { ASSERT(ZTOV(zp)->v_count >= 0); zfs_znode_dmu_fini(zp); } mutex_exit(&zfsvfs->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new vops * unblock. zfs_inactive will have the unmounted behavior, and all * other vops will fail with EIO. */ if (unmounting) { zfsvfs->z_unmounted = B_TRUE; rrw_exit(&zfsvfs->z_teardown_lock, FTAG); rw_exit(&zfsvfs->z_teardown_inactive_lock); #ifdef __FreeBSD__ /* * Some znodes might not be fully reclaimed, wait for them. */ mutex_enter(&zfsvfs->z_znodes_lock); while (list_head(&zfsvfs->z_all_znodes) != NULL) { msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, "zteardown", 0); } mutex_exit(&zfsvfs->z_znodes_lock); #endif } /* * z_os will be NULL if there was an error in attempting to reopen * zfsvfs, so just return as the properties had already been * unregistered and cached data had been evicted before. */ if (zfsvfs->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zfsvfs); /* * Evict cached data */ if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(zfsvfs->z_os); } return (0); }
void zfs_rmnode(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); objset_t *os = zsb->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; uint64_t count; int error; ASSERT(zp->z_links == 0); ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* * If this is an attribute directory, purge its contents. */ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { error = zap_count(os, zp->z_id, &count); if (error) { zfs_znode_dmu_fini(zp); return; } if (count > 0) { taskq_t *taskq; /* * There are still directory entries in this xattr * directory. Let zfs_unlinked_drain() deal with * them to avoid deadlocking this process in the * zfs_purgedir()->zfs_zget()->ilookup() callpath * on the xattr inode's I_FREEING bit. */ taskq = dsl_pool_iput_taskq(dmu_objset_pool(os)); taskq_dispatch(taskq, (task_func_t *) zfs_unlinked_drain, zsb, TQ_SLEEP); zfs_znode_dmu_fini(zp); return; } } /* * Free up all the data in the file. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) iput(ZTOI(xzp)); }
/* * This function is either called directly from reclaim, or in a delayed * manner, so the value of zp->z_vnode may be NULL. */ void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; int error; ASSERT(zp->z_links == 0); /* * If this is an attribute directory, purge its contents. */ if (IFTOVT((mode_t)zp->z_mode) == VDIR && (zp->z_pflags & ZFS_XATTR)) { if (!ZTOV(zp) || zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } } /* * Free up all the data in the file. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); //zfs_znode_free(zp); return; } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); //zfs_znode_free(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); }
int zvol_strategy(buf_t *bp) { zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); uint64_t off, volsize; size_t size, resid; char *addr; objset_t *os; int error = 0; int sync; int reading; int txg_sync_needed = B_FALSE; if (zv == NULL) { bioerror(bp, ENXIO); biodone(bp); return (0); } if (getminor(bp->b_edev) == 0) { bioerror(bp, EINVAL); biodone(bp); return (0); } if (zv->zv_readonly && !(bp->b_flags & B_READ)) { bioerror(bp, EROFS); biodone(bp); return (0); } off = ldbtob(bp->b_blkno); volsize = zv->zv_volsize; os = zv->zv_objset; ASSERT(os != NULL); sync = !(bp->b_flags & B_ASYNC) && !(zil_disable); bp_mapin(bp); addr = bp->b_un.b_addr; resid = bp->b_bcount; /* * There must be no buffer changes when doing a dmu_sync() because * we can't change the data whilst calculating the checksum. * A better approach than a per zvol rwlock would be to lock ranges. */ reading = bp->b_flags & B_READ; if (reading || resid <= zvol_immediate_write_sz) rw_enter(&zv->zv_dslock, RW_READER); else rw_enter(&zv->zv_dslock, RW_WRITER); while (resid != 0 && off < volsize) { size = MIN(resid, 1UL << 20); /* cap at 1MB per tx */ if (size > volsize - off) /* don't write past the end */ size = volsize - off; if (reading) { error = dmu_read(os, ZVOL_OBJ, off, size, addr); } else { dmu_tx_t *tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, size, addr, tx); if (sync) { /* use the ZIL to commit this write */ if (zvol_log_write(zv, tx, off, size, addr) != 0) { txg_sync_needed = B_TRUE; } } dmu_tx_commit(tx); } } if (error) break; off += size; addr += size; resid -= size; } rw_exit(&zv->zv_dslock); if ((bp->b_resid = resid) == bp->b_bcount) bioerror(bp, off > volsize ? EINVAL : error); biodone(bp); if (sync) { if (txg_sync_needed) txg_wait_synced(dmu_objset_pool(os), 0); else zil_commit(zv->zv_zilog, UINT64_MAX, 0); } return (0); }
int zfs_register_callbacks(zfs_sb_t *zsb) { struct dsl_dataset *ds = NULL; objset_t *os = zsb->z_os; zfs_mntopts_t *zmo = zsb->z_mntopts; int error = 0; ASSERT(zsb); ASSERT(zmo); /* * The act of registering our callbacks will destroy any mount * options we may have. In order to enable temporary overrides * of mount options, we stash away the current values and * restore them after we register the callbacks. */ if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os))) { zmo->z_do_readonly = B_TRUE; zmo->z_readonly = B_TRUE; } /* * Register property callbacks. * * It would probably be fine to just check for i/o error from * the first prop_register(), but I guess I like to go * overboard... */ ds = dmu_objset_ds(os); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); error = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; /* * Invoke our callbacks to restore temporary mount options. */ if (zmo->z_do_readonly) readonly_changed_cb(zsb, zmo->z_readonly); if (zmo->z_do_setuid) setuid_changed_cb(zsb, zmo->z_setuid); if (zmo->z_do_exec) exec_changed_cb(zsb, zmo->z_exec); if (zmo->z_do_devices) devices_changed_cb(zsb, zmo->z_devices); if (zmo->z_do_xattr) xattr_changed_cb(zsb, zmo->z_xattr); if (zmo->z_do_atime) atime_changed_cb(zsb, zmo->z_atime); if (zmo->z_do_relatime) relatime_changed_cb(zsb, zmo->z_relatime); if (zmo->z_do_nbmand) nbmand_changed_cb(zsb, zmo->z_nbmand); return (0); unregister: /* * We may attempt to unregister some callbacks that are not * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb); return (error); }
/* * Teardown the zfs_sb_t. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting) { znode_t *zp; /* * If someone has not already unmounted this file system, * drain the iput_taskq to ensure all active references to the * zfs_sb_t have been handled only then can it be safely destroyed. */ if (zsb->z_os) { /* * If we're unmounting we have to wait for the list to * drain completely. * * If we're not unmounting there's no guarantee the list * will drain completely, but iputs run from the taskq * may add the parents of dir-based xattrs to the taskq * so we want to wait for these. * * We can safely read z_nr_znodes without locking because the * VFS has already blocked operations which add to the * z_all_znodes list and thus increment z_nr_znodes. */ int round = 0; while (zsb->z_nr_znodes > 0) { taskq_wait_outstanding(dsl_pool_iput_taskq( dmu_objset_pool(zsb->z_os)), 0); if (++round > 1 && !unmounting) break; } } rrm_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's super block as the * parent filesystem and all of its snapshots have their * inode's super block set to the parent's filesystem's * super block. Note, 'z_parent' is self referential * for non-snapshots. */ shrink_dcache_sb(zsb->z_parent->z_sb); } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zsb->z_log) { zil_close(zsb->z_log); zsb->z_log = NULL; } rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) { rw_exit(&zsb->z_teardown_inactive_lock); rrm_exit(&zsb->z_teardown_lock, FTAG); return (SET_ERROR(EIO)); } /* * At this point there are no VFS ops active, and any new VFS ops * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * * Release all holds on dbufs. */ if (!unmounting) { mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp != NULL; zp = list_next(&zsb->z_all_znodes, zp)) { if (zp->z_sa_hdl) zfs_znode_dmu_fini(zp); } mutex_exit(&zsb->z_znodes_lock); } /* * If we are unmounting, set the unmounted flag and let new VFS ops * unblock. zfs_inactive will have the unmounted behavior, and all * other VFS ops will fail with EIO. */ if (unmounting) { zsb->z_unmounted = B_TRUE; rrm_exit(&zsb->z_teardown_lock, FTAG); rw_exit(&zsb->z_teardown_inactive_lock); } /* * z_os will be NULL if there was an error in attempting to reopen * zsb, so just return as the properties had already been * * unregistered and cached data had been evicted before. */ if (zsb->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zsb); /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) && !zfs_is_readonly(zsb)) txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); dmu_objset_evict_dbufs(zsb->z_os); return (0); }
static void zfs_objset_close(zfsvfs_t *zfsvfs) { zfs_delete_t *zd = &zfsvfs->z_delete_head; znode_t *zp, *nextzp; objset_t *os = zfsvfs->z_os; /* * Stop all delete threads. */ (void) zfs_delete_thread_target(zfsvfs, 0); /* * For forced unmount, at this point all vops except zfs_inactive * are erroring EIO. We need to now suspend zfs_inactive threads * while we are freeing dbufs before switching zfs_inactive * to use behaviour without a objset. */ rw_enter(&zfsvfs->z_um_lock, RW_WRITER); /* * Release all delete in progress znodes * They will be processed when the file system remounts. */ mutex_enter(&zd->z_mutex); while (zp = list_head(&zd->z_znodes)) { list_remove(&zd->z_znodes, zp); zp->z_dbuf_held = 0; dmu_buf_rele(zp->z_dbuf, NULL); } mutex_exit(&zd->z_mutex); /* * Release all holds on dbufs * Note, although we have stopped all other vop threads and * zfs_inactive(), the dmu can callback via znode_pageout_func() * which can zfs_znode_free() the znode. * So we lock z_all_znodes; search the list for a held * dbuf; drop the lock (we know zp can't disappear if we hold * a dbuf lock; then regrab the lock and restart. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { nextzp = list_next(&zfsvfs->z_all_znodes, zp); if (zp->z_dbuf_held) { /* dbufs should only be held when force unmounting */ zp->z_dbuf_held = 0; mutex_exit(&zfsvfs->z_znodes_lock); dmu_buf_rele(zp->z_dbuf, NULL); /* Start again */ mutex_enter(&zfsvfs->z_znodes_lock); nextzp = list_head(&zfsvfs->z_all_znodes); } } mutex_exit(&zfsvfs->z_znodes_lock); /* * Unregister properties. */ if (!dmu_objset_is_snapshot(os)) zfs_unregister_callbacks(zfsvfs); /* * Switch zfs_inactive to behaviour without an objset. * It just tosses cached pages and frees the znode & vnode. * Then re-enable zfs_inactive threads in that new behaviour. */ zfsvfs->z_unmounted2 = B_TRUE; rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ /* * Close the zil. Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } /* * Evict all dbufs so that cached znodes will be freed */ if (dmu_objset_evict_dbufs(os, 1)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(os, 0); } /* * Finally close the objset */ dmu_objset_close(os); /* * We can now safely destroy the '.zfs' directory node. */ if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); }
/*ARGSUSED*/ static int zfs_vfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); objset_t *os = zfsvfs->z_os; znode_t *zp, *nextzp; int ret, i; int flags; /*XXX NOEL: delegation admin stuffs, add back if we use delg. admin */ #if 0 ret = 0; /* UNDEFINED: secpolicy_fs_unmount(cr, vfsp); */ if (ret) { ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), ZFS_DELEG_PERM_MOUNT, cr); if (ret) return (ret); } /* * We purge the parent filesystem's vfsp as the parent filesystem * and all of its snapshots have their vnode's v_vfsp set to the * parent's filesystem's vfsp. Note, 'z_parent' is self * referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); #endif /* * Unmount any snapshots mounted under .zfs before unmounting the * dataset itself. */ #if 0 if (zfsvfs->z_ctldir != NULL && (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { return (ret); #endif flags = SKIPSYSTEM; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; ret = vflush(mp, NULLVP, flags); /* * Mac OS X needs a file system modify time * * We use the mtime of the "com.apple.system.mtime" * extended attribute, which is associated with the * file system root directory. * * Here we need to release the ref we took on z_mtime_vp during mount. */ if ((ret == 0) || (mntflags & MNT_FORCE)) { if (zfsvfs->z_mtime_vp != NULL) { struct vnode *mvp; mvp = zfsvfs->z_mtime_vp; zfsvfs->z_mtime_vp = NULL; if (vnode_get(mvp) == 0) { vnode_rele(mvp); vnode_recycle(mvp); vnode_put(mvp); } } } if (!(mntflags & MNT_FORCE)) { /* * Check the number of active vnodes in the file system. * Our count is maintained in the vfs structure, but the * number is off by 1 to indicate a hold on the vfs * structure itself. * * The '.zfs' directory maintains a reference of its * own, and any active references underneath are * reflected in the vnode count. */ if (ret) return (EBUSY); #if 0 if (zfsvfs->z_ctldir == NULL) { if (vfsp->vfs_count > 1) return (EBUSY); } else { if (vfsp->vfs_count > 2 || zfsvfs->z_ctldir->v_count > 1) { return (EBUSY); } } #endif } rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); /* * At this point there are no vops active, and any new vops will * fail with EIO since we have z_unmount_lock for writer (only * relavent for forced unmount). * * Release all holds on dbufs. * Note, the dmu can still callback via znode_pageout_func() * which can zfs_znode_free() the znode. So we lock * z_all_znodes; search the list for a held dbuf; drop the lock * (we know zp can't disappear if we hold a dbuf lock) then * regrab the lock and restart. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { nextzp = list_next(&zfsvfs->z_all_znodes, zp); if (zp->z_dbuf_held) { /* dbufs should only be held when force unmounting */ zp->z_dbuf_held = 0; mutex_exit(&zfsvfs->z_znodes_lock); dmu_buf_rele(zp->z_dbuf, NULL); /* Start again */ mutex_enter(&zfsvfs->z_znodes_lock); nextzp = list_head(&zfsvfs->z_all_znodes); } } mutex_exit(&zfsvfs->z_znodes_lock); /* * Set the unmounted flag and let new vops unblock. * zfs_inactive will have the unmounted behavior, and all other * vops will fail with EIO. */ zfsvfs->z_unmounted = B_TRUE; rw_exit(&zfsvfs->z_unmount_lock); rw_exit(&zfsvfs->z_unmount_inactive_lock); /* * Unregister properties. */ #ifndef __APPLE__ if (!dmu_objset_is_snapshot(os)) zfs_unregister_callbacks(zfsvfs); #endif /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } /* * Evict all dbufs so that cached znodes will be freed */ if (dmu_objset_evict_dbufs(os, B_TRUE)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(os, B_FALSE); } /* * Finally close the objset */ dmu_objset_close(os); /* * We can now safely destroy the '.zfs' directory node. */ #if 0 if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); #endif /* * Note that this work is normally done in zfs_freevfs, but since * there is no VOP_FREEVFS in OSX, we free VFS items here */ OSDecrementAtomic((SInt32 *)&zfs_active_fs_count); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); mutex_destroy(&zfsvfs->z_znodes_lock); list_destroy(&zfsvfs->z_all_znodes); rw_destroy(&zfsvfs->z_unmount_lock); rw_destroy(&zfsvfs->z_unmount_inactive_lock); return (0); } struct vnode* vnode_getparent(struct vnode *vp); /* sys/vnode_internal.h */ static int zfs_vget_internal(zfsvfs_t *zfsvfs, ino64_t ino, struct vnode **vpp) { struct vnode *vp; struct vnode *dvp = NULL; znode_t *zp; int error; *vpp = NULL; /* * On Mac OS X we always export the root directory id as 2 * and its parent as 1 */ if (ino == 2 || ino == 1) ino = zfsvfs->z_root; if ((error = zfs_zget(zfsvfs, ino, &zp))) goto out; /* Don't expose EA objects! */ if (zp->z_phys->zp_flags & ZFS_XATTR) { vnode_put(ZTOV(zp)); error = ENOENT; goto out; } *vpp = vp = ZTOV(zp); if (vnode_isvroot(vp)) goto out; /* * If this znode didn't just come from the cache then * it won't have a valid identity (parent and name). * * Manually fix its identity here (normally done by namei lookup). */ if ((dvp = vnode_getparent(vp)) == NULL) { if (zp->z_phys->zp_parent != 0 && zfs_vget_internal(zfsvfs, zp->z_phys->zp_parent, &dvp)) { goto out; } if ( vnode_isdir(dvp) ) { char objname[ZAP_MAXNAMELEN]; /* 256 bytes */ int flags = VNODE_UPDATE_PARENT; /* Look for znode's name in its parent's zap */ if ( zap_value_search(zfsvfs->z_os, zp->z_phys->zp_parent, zp->z_id, ZFS_DIRENT_OBJ(-1ULL), objname) == 0 ) { flags |= VNODE_UPDATE_NAME; } /* Update the znode's parent and name */ vnode_update_identity(vp, dvp, objname, 0, 0, flags); } } /* All done with znode's parent */ vnode_put(dvp); out: return (error); } /* * Get a vnode from a file id (ignoring the generation) * * Use by NFS Server (readdirplus) and VFS (build_path) */ static int zfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); int error; ZFS_ENTER(zfsvfs); /* * On Mac OS X we always export the root directory id as 2. * So we don't expect to see the real root directory id * from zfs_vfs_vget KPI (unless of course the real id was * already 2). */ if ((ino == zfsvfs->z_root) && (zfsvfs->z_root != 2)) { ZFS_EXIT(zfsvfs); return (ENOENT); } error = zfs_vget_internal(zfsvfs, ino, vpp); ZFS_EXIT(zfsvfs); return (error); }
/* * dsl_crypto_key_unload * * Remove the key from the in memory keystore. * * First we have to remove the minor node for a ZVOL or unmount * the filesystem. This is so that we flush all pending IO for it to disk * so we won't need to encrypt anything with this key. Anything in flight * should already have a lock on the keys it needs. * We can't assume that userland has already successfully unmounted the * dataset though in many cases it will have. * * If the key can't be removed return the failure back to our caller. */ int dsl_crypto_key_unload(const char *dsname) { dsl_dataset_t *ds; objset_t *os; int error; spa_t *spa; dsl_pool_t *dp; #ifdef _KERNEL dmu_objset_type_t os_type; //vfs_t *vfsp; struct vfsmount *vfsp; #endif /* _KERNEL */ error = dsl_pool_hold(dsname, FTAG, &dp); if (error != 0) return (error); /* XXX - should we use own_exclusive() here? */ if ((error = dsl_dataset_hold(dp, dsname, FTAG, &ds)) != 0) { dsl_pool_rele(dp, FTAG); return (error); } if ((error = dmu_objset_from_ds(ds, &os)) != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (error); } #ifdef _KERNEL /* * Make sure that the device node has gone for ZVOLs * and that filesystems are umounted. */ #if 0 // FIXME os_type = dmu_objset_type(os); if (os_type == DMU_OST_ZVOL) { error = zvol_remove_minor(dsname); if (error == ENXIO) error = 0; } else if (os_type == DMU_OST_ZFS) { vfsp = zfs_get_vfs(dsname); if (vfsp != NULL) { error = vn_vfswlock(vfsp->vfs_vnodecovered); VFS_RELE(vfsp); if (error == 0) error = dounmount(vfsp, 0, CRED()); } } if (error != 0) { dsl_dataset_rele(ds, FTAG); return (error); } #endif #endif /* _KERNEL */ /* * Make sure all dbufs are synced. * * It is essential for encrypted datasets to ensure that * there is no further pending IO before removing the key. */ if (dmu_objset_is_dirty(os, 0)) // FIXME, 0? txg_wait_synced(dmu_objset_pool(os), 0); dmu_objset_evict_dbufs(os); spa = dsl_dataset_get_spa(ds); error = zcrypt_keystore_remove(spa, ds->ds_object); dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (error); }
int zfs_register_callbacks(zfs_sb_t *zsb) { struct dsl_dataset *ds = NULL; objset_t *os = zsb->z_os; boolean_t do_readonly = B_FALSE; int error = 0; if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os))) do_readonly = B_TRUE; /* * Register property callbacks. * * It would probably be fine to just check for i/o error from * the first prop_register(), but I guess I like to go * overboard... */ ds = dmu_objset_ds(os); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); error = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; if (do_readonly) readonly_changed_cb(zsb, B_TRUE); return (0); unregister: /* * We may attempt to unregister some callbacks that are not * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb); return (error); }