void zfs_unregister_callbacks(zfs_sb_t *zsb) { objset_t *os = zsb->z_os; if (!dmu_objset_is_snapshot(os)) dsl_prop_unregister_all(dmu_objset_ds(os), zsb); }
/* * Safely mark an inode dirty. Inodes which are part of a read-only * file system or snapshot may not be dirtied. */ void zfs_mark_inode_dirty(struct inode *ip) { zfs_sb_t *zsb = ITOZSB(ip); if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os)) return; mark_inode_dirty(ip); }
void zfs_unregister_callbacks(zfs_sb_t *zsb) { objset_t *os = zsb->z_os; struct dsl_dataset *ds; /* * Unregister properties. */ if (!dmu_objset_is_snapshot(os)) { ds = dmu_objset_ds(os); VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "relatime", relatime_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "acltype", acltype_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb) == 0); } }
int dmu_objset_userspace_upgrade(objset_t *os) { uint64_t obj; int err = 0; if (dmu_objset_userspace_present(os)) return (0); if (!dmu_objset_userused_enabled(os->os)) return (ENOTSUP); if (dmu_objset_is_snapshot(os)) return (EINVAL); /* * We simply need to mark every object dirty, so that it will be * synced out and now accounted. If this is called * concurrently, or if we already did some work before crashing, * that's fine, since we track each object's accounted state * independently. */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { dmu_tx_t *tx; dmu_buf_t *db; int objerr; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); objerr = dmu_bonus_hold(os, obj, FTAG, &db); if (objerr) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); if (objerr) { dmu_tx_abort(tx); continue; } dmu_buf_will_dirty(db, tx); dmu_buf_rele(db, FTAG); dmu_tx_commit(tx); } os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; txg_wait_synced(dmu_objset_pool(os), 0); return (0); }
static int zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; int error; uint64_t ro; /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) return (SET_ERROR(-error)); zv->zv_objset = os; error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); if (error) goto out_owned; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_owned; error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) goto out_owned; set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; zv->zv_zilog = zil_open(os, zvol_get_data); if (ro || dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) { set_disk_ro(zv->zv_disk, 1); zv->zv_flags |= ZVOL_RDONLY; } else { set_disk_ro(zv->zv_disk, 0); zv->zv_flags &= ~ZVOL_RDONLY; } out_owned: if (error) { dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; } return (SET_ERROR(-error)); }
static int zfs_refresh_properties(vfs_t *vfsp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; /* * Remount operations default to "rw" unless "ro" is explicitly * specified. */ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { readonly_changed_cb(zfsvfs, B_TRUE); } else { if (!dmu_objset_is_snapshot(zfsvfs->z_os)) readonly_changed_cb(zfsvfs, B_FALSE); else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) return (EROFS); } if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { devices_changed_cb(zfsvfs, B_FALSE); setuid_changed_cb(zfsvfs, B_FALSE); } else { if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) devices_changed_cb(zfsvfs, B_FALSE); else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) devices_changed_cb(zfsvfs, B_TRUE); if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) setuid_changed_cb(zfsvfs, B_FALSE); else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) setuid_changed_cb(zfsvfs, B_TRUE); } if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) exec_changed_cb(zfsvfs, B_FALSE); else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) exec_changed_cb(zfsvfs, B_TRUE); if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) atime_changed_cb(zfsvfs, B_TRUE); else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) atime_changed_cb(zfsvfs, B_FALSE); return (0); }
void zfs_unregister_callbacks(zfsvfs_t *zfsvfs) { objset_t *os = zfsvfs->z_os; struct dsl_dataset *ds; /* * Unregister properties. */ if (!dmu_objset_is_snapshot(os)) { ds = dmu_objset_ds(os); VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs) == 0); VERIFY(dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs) == 0); } }
/* * Create a block device minor node and setup the linkage between it * and the specified volume. Once this function returns the block * device is live and ready for use. */ static int zvol_create_minor_impl(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; uint64_t len; unsigned minor = 0; int error = 0; mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); if (zv) { error = SET_ERROR(EEXIST); goto out; } doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = SET_ERROR(EAGAIN); goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_queue, (DMU_MAX_ACCESS / 4) >> 9); blk_queue_max_segments(zv->zv_queue, UINT16_MAX); blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); blk_queue_max_discard_sectors(zv->zv_queue, (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); #ifdef QUEUE_FLAG_NONROT queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); #endif #ifdef QUEUE_FLAG_ADD_RANDOM queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue); #endif if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); } /* * When udev detects the addition of the device it will immediately * invoke blkid(8) to determine the type of content on the device. * Prefetching the blocks commonly scanned by blkid(8) will speed * up this process. */ len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE); if (len > 0) { dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ); dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len, ZIO_PRIORITY_SYNC_READ); } zv->zv_objset = NULL; out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); out_doi: kmem_free(doi, sizeof (dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); /* * Drop the lock to prevent deadlock with sys_open() -> * zvol_open(), which first takes bd_disk->bd_mutex and then * takes zvol_state_lock, whereas this code path first takes * zvol_state_lock, and then takes bd_disk->bd_mutex. */ mutex_exit(&zvol_state_lock); add_disk(zv->zv_disk); } else { mutex_exit(&zvol_state_lock); } return (SET_ERROR(error)); }
static int __zvol_create_minor(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; unsigned minor = 0; int error = 0; ASSERT(MUTEX_HELD(&zvol_state_lock)); zv = zvol_find_by_name(name); if (zv) { error = EEXIST; goto out; } doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; /* Make sure we have the key loaded if we need one. */ error = dsl_crypto_key_inherit(name); if (error != 0 && error != EEXIST) goto out_dmu_objset_disown; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = EAGAIN; goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX); blk_queue_max_segments(zv->zv_queue, UINT16_MAX); blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); #ifdef HAVE_BLK_QUEUE_DISCARD blk_queue_max_discard_sectors(zv->zv_queue, (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); #endif #ifdef HAVE_BLK_QUEUE_NONROT queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); #endif if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; out_doi: kmem_free(doi, sizeof(dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); add_disk(zv->zv_disk); } return (error); }
static int zfs_domount(vfs_t *vfsp, char *osname) { uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rrw_init(&zfsvfs->z_teardown_lock); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; zfsvfs->z_vfs->vfs_bsize = recordsize; vfsp->vfs_data = zfsvfs; vfsp->mnt_flag |= MNT_LOCAL; vfsp->mnt_kern_flag |= MNTK_MPSAFE; vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; mode = DS_MODE_OWNER; if (readonly) mode |= DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_OWNER | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp)) goto out; /* * Set features for file system. */ zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); if (zfsvfs->z_use_fuids) { vfs_set_feature(vfsp, VFSFT_XVATTR); vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); vfs_set_feature(vfsp, VFSFT_ACLONCREATE); } if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); } if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) goto out; xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } vfs_mountedfrom(vfsp, osname); if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); zfs_freezfsvfs(zfsvfs); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }
int zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent) { const char *osname = zmo->z_osname; zfs_sb_t *zsb; struct inode *root_inode; uint64_t recordsize; int error; error = zfs_sb_create(osname, zmo, &zsb); if (error) return (error); if ((error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL))) goto out; zsb->z_sb = sb; sb->s_fs_info = zsb; sb->s_magic = ZFS_SUPER_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); zsb->z_bdi.ra_pages = 0; sb->s_bdi = &zsb->z_bdi; error = -zpl_bdi_setup_and_register(&zsb->z_bdi, "zfs"); if (error) goto out; /* Set callback operations for the file system. */ sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; #ifdef HAVE_S_D_OP sb->s_d_op = &zpl_dentry_operations; #endif /* HAVE_S_D_OP */ /* Set features for file system. */ zfs_set_fuid_feature(zsb); if (dmu_objset_is_snapshot(zsb->z_os)) { uint64_t pval; atime_changed_cb(zsb, B_FALSE); readonly_changed_cb(zsb, B_TRUE); if ((error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))) goto out; xattr_changed_cb(zsb, pval); if ((error = dsl_prop_get_integer(osname, "acltype", &pval, NULL))) goto out; acltype_changed_cb(zsb, pval); zsb->z_issnap = B_TRUE; zsb->z_os->os_sync = ZFS_SYNC_DISABLED; zsb->z_snap_defer_time = jiffies; mutex_enter(&zsb->z_os->os_user_ptr_lock); dmu_objset_set_user(zsb->z_os, zsb); mutex_exit(&zsb->z_os->os_user_ptr_lock); } else { error = zfs_sb_setup(zsb, B_TRUE); } /* Allocate a root inode for the filesystem. */ error = zfs_root(zsb, &root_inode); if (error) { (void) zfs_umount(sb); goto out; } /* Allocate a root dentry for the filesystem */ sb->s_root = d_make_root(root_inode); if (sb->s_root == NULL) { (void) zfs_umount(sb); error = SET_ERROR(ENOMEM); goto out; } if (!zsb->z_issnap) zfsctl_create(zsb); zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); zfs_sb_free(zsb); } return (error); }
/*ARGSUSED*/ static int zfs_vfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); objset_t *os = zfsvfs->z_os; znode_t *zp, *nextzp; int ret, i; int flags; /*XXX NOEL: delegation admin stuffs, add back if we use delg. admin */ #if 0 ret = 0; /* UNDEFINED: secpolicy_fs_unmount(cr, vfsp); */ if (ret) { ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), ZFS_DELEG_PERM_MOUNT, cr); if (ret) return (ret); } /* * We purge the parent filesystem's vfsp as the parent filesystem * and all of its snapshots have their vnode's v_vfsp set to the * parent's filesystem's vfsp. Note, 'z_parent' is self * referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); #endif /* * Unmount any snapshots mounted under .zfs before unmounting the * dataset itself. */ #if 0 if (zfsvfs->z_ctldir != NULL && (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { return (ret); #endif flags = SKIPSYSTEM; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; ret = vflush(mp, NULLVP, flags); /* * Mac OS X needs a file system modify time * * We use the mtime of the "com.apple.system.mtime" * extended attribute, which is associated with the * file system root directory. * * Here we need to release the ref we took on z_mtime_vp during mount. */ if ((ret == 0) || (mntflags & MNT_FORCE)) { if (zfsvfs->z_mtime_vp != NULL) { struct vnode *mvp; mvp = zfsvfs->z_mtime_vp; zfsvfs->z_mtime_vp = NULL; if (vnode_get(mvp) == 0) { vnode_rele(mvp); vnode_recycle(mvp); vnode_put(mvp); } } } if (!(mntflags & MNT_FORCE)) { /* * Check the number of active vnodes in the file system. * Our count is maintained in the vfs structure, but the * number is off by 1 to indicate a hold on the vfs * structure itself. * * The '.zfs' directory maintains a reference of its * own, and any active references underneath are * reflected in the vnode count. */ if (ret) return (EBUSY); #if 0 if (zfsvfs->z_ctldir == NULL) { if (vfsp->vfs_count > 1) return (EBUSY); } else { if (vfsp->vfs_count > 2 || zfsvfs->z_ctldir->v_count > 1) { return (EBUSY); } } #endif } rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); /* * At this point there are no vops active, and any new vops will * fail with EIO since we have z_unmount_lock for writer (only * relavent for forced unmount). * * Release all holds on dbufs. * Note, the dmu can still callback via znode_pageout_func() * which can zfs_znode_free() the znode. So we lock * z_all_znodes; search the list for a held dbuf; drop the lock * (we know zp can't disappear if we hold a dbuf lock) then * regrab the lock and restart. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { nextzp = list_next(&zfsvfs->z_all_znodes, zp); if (zp->z_dbuf_held) { /* dbufs should only be held when force unmounting */ zp->z_dbuf_held = 0; mutex_exit(&zfsvfs->z_znodes_lock); dmu_buf_rele(zp->z_dbuf, NULL); /* Start again */ mutex_enter(&zfsvfs->z_znodes_lock); nextzp = list_head(&zfsvfs->z_all_znodes); } } mutex_exit(&zfsvfs->z_znodes_lock); /* * Set the unmounted flag and let new vops unblock. * zfs_inactive will have the unmounted behavior, and all other * vops will fail with EIO. */ zfsvfs->z_unmounted = B_TRUE; rw_exit(&zfsvfs->z_unmount_lock); rw_exit(&zfsvfs->z_unmount_inactive_lock); /* * Unregister properties. */ #ifndef __APPLE__ if (!dmu_objset_is_snapshot(os)) zfs_unregister_callbacks(zfsvfs); #endif /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } /* * Evict all dbufs so that cached znodes will be freed */ if (dmu_objset_evict_dbufs(os, B_TRUE)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(os, B_FALSE); } /* * Finally close the objset */ dmu_objset_close(os); /* * We can now safely destroy the '.zfs' directory node. */ #if 0 if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); #endif /* * Note that this work is normally done in zfs_freevfs, but since * there is no VOP_FREEVFS in OSX, we free VFS items here */ OSDecrementAtomic((SInt32 *)&zfs_active_fs_count); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); mutex_destroy(&zfsvfs->z_znodes_lock); list_destroy(&zfsvfs->z_all_znodes); rw_destroy(&zfsvfs->z_unmount_lock); rw_destroy(&zfsvfs->z_unmount_inactive_lock); return (0); } struct vnode* vnode_getparent(struct vnode *vp); /* sys/vnode_internal.h */ static int zfs_vget_internal(zfsvfs_t *zfsvfs, ino64_t ino, struct vnode **vpp) { struct vnode *vp; struct vnode *dvp = NULL; znode_t *zp; int error; *vpp = NULL; /* * On Mac OS X we always export the root directory id as 2 * and its parent as 1 */ if (ino == 2 || ino == 1) ino = zfsvfs->z_root; if ((error = zfs_zget(zfsvfs, ino, &zp))) goto out; /* Don't expose EA objects! */ if (zp->z_phys->zp_flags & ZFS_XATTR) { vnode_put(ZTOV(zp)); error = ENOENT; goto out; } *vpp = vp = ZTOV(zp); if (vnode_isvroot(vp)) goto out; /* * If this znode didn't just come from the cache then * it won't have a valid identity (parent and name). * * Manually fix its identity here (normally done by namei lookup). */ if ((dvp = vnode_getparent(vp)) == NULL) { if (zp->z_phys->zp_parent != 0 && zfs_vget_internal(zfsvfs, zp->z_phys->zp_parent, &dvp)) { goto out; } if ( vnode_isdir(dvp) ) { char objname[ZAP_MAXNAMELEN]; /* 256 bytes */ int flags = VNODE_UPDATE_PARENT; /* Look for znode's name in its parent's zap */ if ( zap_value_search(zfsvfs->z_os, zp->z_phys->zp_parent, zp->z_id, ZFS_DIRENT_OBJ(-1ULL), objname) == 0 ) { flags |= VNODE_UPDATE_NAME; } /* Update the znode's parent and name */ vnode_update_identity(vp, dvp, objname, 0, 0, flags); } } /* All done with znode's parent */ vnode_put(dvp); out: return (error); } /* * Get a vnode from a file id (ignoring the generation) * * Use by NFS Server (readdirplus) and VFS (build_path) */ static int zfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); int error; ZFS_ENTER(zfsvfs); /* * On Mac OS X we always export the root directory id as 2. * So we don't expect to see the real root directory id * from zfs_vfs_vget KPI (unless of course the real id was * already 2). */ if ((ino == zfsvfs->z_root) && (zfsvfs->z_root != 2)) { ZFS_EXIT(zfsvfs); return (ENOENT); } error = zfs_vget_internal(zfsvfs, ino, vpp); ZFS_EXIT(zfsvfs); return (error); }
static int zfs_domount(struct mount *mp, dev_t mount_dev, char *osname, vfs_context_t ctx) { uint64_t readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; struct timeval tv; ASSERT(mp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = mp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); #ifndef __APPLE__ /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); #endif vfs_setfsprivate(mp, zfsvfs); if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_RDONLY)); } else { mode = DS_MODE_PRIMARY; } error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, (cred_t *) vfs_context_ucred(ctx))) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ vnode_put(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t xattr; ASSERT(mode & DS_MODE_READONLY); #if 0 atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) goto out; xattr_changed_cb(zfsvfs, xattr); #endif zfsvfs->z_issnap = B_TRUE; } else { if (!vfs_isrdonly(mp)) zfs_unlinked_drain(zfsvfs); #ifndef __APPLE__ /* * Parse and replay the intent log. * * Because of ziltest, this must be done after * zfs_unlinked_drain(). (Further note: ziltest doesn't * use readonly mounts, where zfs_unlinked_drain() isn't * called.) This is because ziltest causes spa_sync() * to think it's committed, but actually it is not, so * the intent log contains many txg's worth of changes. * * In particular, if object N is in the unlinked set in * the last txg to actually sync, then it could be * actually freed in a later txg and then reallocated in * a yet later txg. This would write a "create object * N" record to the intent log. Normally, this would be * fine because the spa_sync() would have written out * the fact that object N is free, before we could write * the "create object N" intent log record. * * But when we are in ziltest mode, we advance the "open * txg" without actually spa_sync()-ing the changes to * disk. So we would see that object N is still * allocated and in the unlinked set, and there is an * intent log record saying to allocate it. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); #endif } #if 0 if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); #endif /* * Record the mount time (for Spotlight) */ microtime(&tv); zfsvfs->z_mount_time = tv.tv_sec; out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); mutex_destroy(&zfsvfs->z_znodes_lock); list_destroy(&zfsvfs->z_all_znodes); rw_destroy(&zfsvfs->z_unmount_lock); rw_destroy(&zfsvfs->z_unmount_inactive_lock); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { OSIncrementAtomic(&zfs_active_fs_count); (void) copystr(osname, vfs_statfs(mp)->f_mntfromname, MNAMELEN - 1, 0); vfs_getnewfsid(mp); } return (error); }
int zfs_domount(struct super_block *sb, void *data, int silent) { zpl_mount_data_t *zmd = data; const char *osname = zmd->z_osname; zfs_sb_t *zsb; struct inode *root_inode; uint64_t recordsize; int error; error = zfs_sb_create(osname, &zsb); if (error) return (error); if ((error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL))) goto out; zsb->z_sb = sb; sb->s_fs_info = zsb; sb->s_magic = ZFS_SUPER_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); #ifdef HAVE_BDI /* * 2.6.32 API change, * Added backing_device_info (BDI) per super block interfaces. A BDI * must be configured when using a non-device backed filesystem for * proper writeback. This is not required for older pdflush kernels. * * NOTE: Linux read-ahead is disabled in favor of zfs read-ahead. */ zsb->z_bdi.ra_pages = 0; sb->s_bdi = &zsb->z_bdi; error = -bdi_setup_and_register(&zsb->z_bdi, "zfs", BDI_CAP_MAP_COPY); if (error) goto out; #endif /* HAVE_BDI */ /* Set callback operations for the file system. */ sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; #ifdef HAVE_S_D_OP sb->s_d_op = &zpl_dentry_operations; #endif /* HAVE_S_D_OP */ /* Set features for file system. */ zfs_set_fuid_feature(zsb); if (dmu_objset_is_snapshot(zsb->z_os)) { uint64_t pval; atime_changed_cb(zsb, B_FALSE); readonly_changed_cb(zsb, B_TRUE); if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL))) goto out; xattr_changed_cb(zsb, pval); zsb->z_issnap = B_TRUE; zsb->z_os->os_sync = ZFS_SYNC_DISABLED; mutex_enter(&zsb->z_os->os_user_ptr_lock); dmu_objset_set_user(zsb->z_os, zsb); mutex_exit(&zsb->z_os->os_user_ptr_lock); } else { error = zfs_sb_setup(zsb, B_TRUE); } /* Allocate a root inode for the filesystem. */ error = zfs_root(zsb, &root_inode); if (error) { (void) zfs_umount(sb); goto out; } /* Allocate a root dentry for the filesystem */ sb->s_root = d_make_root(root_inode); if (sb->s_root == NULL) { (void) zfs_umount(sb); error = ENOMEM; goto out; } if (!zsb->z_issnap) zfsctl_create(zsb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); zfs_sb_free(zsb); } return (error); }
static int zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; int locked = 0; int error; uint64_t ro; /* * In all other cases the spa_namespace_lock is taken before the * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() * function calls fops->open() with the bdev->bd_mutex lock held. * * To avoid a potential lock inversion deadlock we preemptively * try to take the spa_namespace_lock(). Normally it will not * be contended and this is safe because spa_open_common() handles * the case where the caller already holds the spa_namespace_lock. * * When it is contended we risk a lock inversion if we were to * block waiting for the lock. Luckily, the __blkdev_get() * function allows us to return -ERESTARTSYS which will result in * bdev->bd_mutex being dropped, reacquired, and fops->open() being * called again. This process can be repeated safely until both * locks are acquired. */ if (!mutex_owned(&spa_namespace_lock)) { locked = mutex_tryenter(&spa_namespace_lock); if (!locked) return (-ERESTARTSYS); } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) goto out_mutex; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } zv->zv_objset = os; error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; zv->zv_zilog = zil_open(os, zvol_get_data); VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0); if (ro || dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) { set_disk_ro(zv->zv_disk, 1); zv->zv_flags |= ZVOL_RDONLY; } else { set_disk_ro(zv->zv_disk, 0); zv->zv_flags &= ~ZVOL_RDONLY; } out_mutex: if (locked) mutex_exit(&spa_namespace_lock); return (-error); }
static int zfs_register_callbacks(vfs_t *vfsp) { struct dsl_dataset *ds = NULL; objset_t *os = NULL; zfsvfs_t *zfsvfs = NULL; uint64_t nbmand; int readonly, do_readonly = FALSE; int setuid, do_setuid = FALSE; int exec, do_exec = FALSE; int xattr, do_xattr = FALSE; int atime, do_atime = FALSE; int error = 0; ASSERT(vfsp); zfsvfs = vfsp->vfs_data; ASSERT(zfsvfs); os = zfsvfs->z_os; /* * This function can be called for a snapshot when we update snapshot's * mount point, which isn't really supported. */ if (dmu_objset_is_snapshot(os)) return (EOPNOTSUPP); /* * The act of registering our callbacks will destroy any mount * options we may have. In order to enable temporary overrides * of mount options, we stash away the current values and * restore them after we register the callbacks. */ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { readonly = B_TRUE; do_readonly = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { readonly = B_FALSE; do_readonly = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else { if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { setuid = B_TRUE; do_setuid = B_TRUE; } } if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { exec = B_FALSE; do_exec = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { exec = B_TRUE; do_exec = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { xattr = B_FALSE; do_xattr = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { xattr = B_TRUE; do_xattr = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { atime = B_FALSE; do_atime = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { atime = B_TRUE; do_atime = B_TRUE; } /* * nbmand is a special property. It can only be changed at * mount time. * * This is weird, but it is documented to only be changeable * at mount time. */ if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { nbmand = B_FALSE; } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { nbmand = B_TRUE; } else { char osname[MAXNAMELEN]; dmu_objset_name(os, osname); if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, NULL)) { return (error); } } /* * Register property callbacks. * * It would probably be fine to just check for i/o error from * the first prop_register(), but I guess I like to go * overboard... */ ds = dmu_objset_ds(os); error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "xattr", xattr_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "recordsize", blksz_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "readonly", readonly_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "setuid", setuid_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "exec", exec_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "snapdir", snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "aclmode", acl_mode_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "vscan", vscan_changed_cb, zfsvfs); if (error) goto unregister; /* * Invoke our callbacks to restore temporary mount options. */ if (do_readonly) readonly_changed_cb(zfsvfs, readonly); if (do_setuid) setuid_changed_cb(zfsvfs, setuid); if (do_exec) exec_changed_cb(zfsvfs, exec); if (do_xattr) xattr_changed_cb(zfsvfs, xattr); if (do_atime) atime_changed_cb(zfsvfs, atime); nbmand_changed_cb(zfsvfs, nbmand); return (0); unregister: /* * We may attempt to unregister some callbacks that are not * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); return (error); }
static int zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) { dev_t mount_dev; uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; vfsp->vfs_dev = mount_dev; vfsp->vfs_fstype = zfsfstype; vfsp->vfs_bsize = recordsize; vfsp->vfs_flag |= VFS_NOTRUNC; vfsp->vfs_data = zfsvfs; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) mode = DS_MODE_PRIMARY | DS_MODE_READONLY; else mode = DS_MODE_PRIMARY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, cr)) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ VN_RELE(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); zfsvfs->z_issnap = B_TRUE; } else { error = zfs_register_callbacks(vfsp); if (error) goto out; /* * Start a delete thread running. */ (void) zfs_delete_thread_target(zfsvfs, 1); /* * Parse and replay the intent log. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector, (void (*)(void *))zfs_delete_wait_empty); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); } if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }
static void zfs_objset_close(zfsvfs_t *zfsvfs) { zfs_delete_t *zd = &zfsvfs->z_delete_head; znode_t *zp, *nextzp; objset_t *os = zfsvfs->z_os; /* * Stop all delete threads. */ (void) zfs_delete_thread_target(zfsvfs, 0); /* * For forced unmount, at this point all vops except zfs_inactive * are erroring EIO. We need to now suspend zfs_inactive threads * while we are freeing dbufs before switching zfs_inactive * to use behaviour without a objset. */ rw_enter(&zfsvfs->z_um_lock, RW_WRITER); /* * Release all delete in progress znodes * They will be processed when the file system remounts. */ mutex_enter(&zd->z_mutex); while (zp = list_head(&zd->z_znodes)) { list_remove(&zd->z_znodes, zp); zp->z_dbuf_held = 0; dmu_buf_rele(zp->z_dbuf, NULL); } mutex_exit(&zd->z_mutex); /* * Release all holds on dbufs * Note, although we have stopped all other vop threads and * zfs_inactive(), the dmu can callback via znode_pageout_func() * which can zfs_znode_free() the znode. * So we lock z_all_znodes; search the list for a held * dbuf; drop the lock (we know zp can't disappear if we hold * a dbuf lock; then regrab the lock and restart. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { nextzp = list_next(&zfsvfs->z_all_znodes, zp); if (zp->z_dbuf_held) { /* dbufs should only be held when force unmounting */ zp->z_dbuf_held = 0; mutex_exit(&zfsvfs->z_znodes_lock); dmu_buf_rele(zp->z_dbuf, NULL); /* Start again */ mutex_enter(&zfsvfs->z_znodes_lock); nextzp = list_head(&zfsvfs->z_all_znodes); } } mutex_exit(&zfsvfs->z_znodes_lock); /* * Unregister properties. */ if (!dmu_objset_is_snapshot(os)) zfs_unregister_callbacks(zfsvfs); /* * Switch zfs_inactive to behaviour without an objset. * It just tosses cached pages and frees the znode & vnode. * Then re-enable zfs_inactive threads in that new behaviour. */ zfsvfs->z_unmounted2 = B_TRUE; rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ /* * Close the zil. Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } /* * Evict all dbufs so that cached znodes will be freed */ if (dmu_objset_evict_dbufs(os, 1)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(os, 0); } /* * Finally close the objset */ dmu_objset_close(os); /* * We can now safely destroy the '.zfs' directory node. */ if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); }
static int __zvol_create_minor(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; unsigned minor = 0; int error = 0; ASSERT(MUTEX_HELD(&zvol_state_lock)); zv = zvol_find_by_name(name); if (zv) { error = EEXIST; goto out; } doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = EAGAIN; goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; out_doi: kmem_free(doi, sizeof(dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); add_disk(zv->zv_disk); } return (error); }
void dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) { /* * There should not be anything wrong with having kstats for * snapshots. Since we are not sure how useful they would be * though nor how much their memory overhead would matter in * a filesystem with many snapshots, we skip them for now. */ if (dmu_objset_is_snapshot(objset)) return; /* * At the time of this writing, KSTAT_STRLEN is 255 in Linux, * and the spa_name can theoretically be up to 256 characters. * In reality though the spa_name can be 240 characters max * [see origin directory name check in pool_namecheck()]. Thus, * the naming scheme for the module name below should not cause * any truncations. In the event that a truncation does happen * though, due to some future change, we silently skip creating * the kstat and log the event. */ char kstat_module_name[KSTAT_STRLEN]; int n = snprintf(kstat_module_name, sizeof (kstat_module_name), "zfs/%s", spa_name(dmu_objset_spa(objset))); if (n < 0) { zfs_dbgmsg("failed to create dataset kstat for objset %lld: " " snprintf() for kstat module name returned %d", (unsigned long long)dmu_objset_id(objset), n); return; } else if (n >= KSTAT_STRLEN) { zfs_dbgmsg("failed to create dataset kstat for objset %lld: " "kstat module name length (%d) exceeds limit (%d)", (unsigned long long)dmu_objset_id(objset), n, KSTAT_STRLEN); return; } char kstat_name[KSTAT_STRLEN]; n = snprintf(kstat_name, sizeof (kstat_name), "objset-0x%llx", (unsigned long long)dmu_objset_id(objset)); if (n < 0) { zfs_dbgmsg("failed to create dataset kstat for objset %lld: " " snprintf() for kstat name returned %d", (unsigned long long)dmu_objset_id(objset), n); return; } ASSERT3U(n, <, KSTAT_STRLEN); kstat_t *kstat = kstat_create(kstat_module_name, 0, kstat_name, "dataset", KSTAT_TYPE_NAMED, sizeof (empty_dataset_kstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (kstat == NULL) return; dataset_kstat_values_t *dk_kstats = kmem_alloc(sizeof (empty_dataset_kstats), KM_SLEEP); bcopy(&empty_dataset_kstats, dk_kstats, sizeof (empty_dataset_kstats)); char *ds_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(objset->os_dsl_dataset, ds_name); KSTAT_NAMED_STR_PTR(&dk_kstats->dkv_ds_name) = ds_name; KSTAT_NAMED_STR_BUFLEN(&dk_kstats->dkv_ds_name) = ZFS_MAX_DATASET_NAME_LEN; kstat->ks_data = dk_kstats; kstat->ks_update = dataset_kstats_update; kstat->ks_private = dk; kstat_install(kstat); dk->dk_kstats = kstat; aggsum_init(&dk->dk_aggsums.das_writes, 0); aggsum_init(&dk->dk_aggsums.das_nwritten, 0); aggsum_init(&dk->dk_aggsums.das_reads, 0); aggsum_init(&dk->dk_aggsums.das_nread, 0); aggsum_init(&dk->dk_aggsums.das_nunlinks, 0); aggsum_init(&dk->dk_aggsums.das_nunlinked, 0); }