/* * Create minors for the specified dataset, including children and snapshots. * Pay attention to the 'snapdev' property and iterate over the snapshots * only if they are 'visible'. This approach allows one to assure that the * snapshot metadata is read from disk only if it is needed. * * The name can represent a dataset to be recursively scanned for zvols and * their snapshots, or a single zvol snapshot. If the name represents a * dataset, the scan is performed in two nested stages: * - scan the dataset for zvols, and * - for each zvol, create a minor node, then check if the zvol's snapshots * are 'visible', and only then iterate over the snapshots if needed * * If the name represents a snapshot, a check is perfromed if the snapshot is * 'visible' (which also verifies that the parent is a zvol), and if so, * a minor node for that snapshot is created. */ static int zvol_create_minors_impl(const char *name) { int error = 0; fstrans_cookie_t cookie; char *atp, *parent; if (zvol_inhibit_dev) return (0); parent = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) strlcpy(parent, name, MAXPATHLEN); if ((atp = strrchr(parent, '@')) != NULL) { uint64_t snapdev; *atp = '\0'; error = dsl_prop_get_integer(parent, "snapdev", &snapdev, NULL); if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE) error = zvol_create_minor_impl(name); } else { cookie = spl_fstrans_mark(); error = dmu_objset_find(parent, zvol_create_minors_cb, NULL, DS_FIND_CHILDREN); spl_fstrans_unmark(cookie); } kmem_free(parent, MAXPATHLEN); return (SET_ERROR(error)); }
/* * Set ZFS_PROP_VOLSIZE set entry point. */ int zvol_set_volsize(const char *name, uint64_t volsize) { zvol_state_t *zv = NULL; objset_t *os = NULL; int error; dmu_object_info_t *doi; uint64_t readonly; boolean_t owned = B_FALSE; error = dsl_prop_get_integer(name, zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); if (error != 0) return (SET_ERROR(error)); if (readonly) return (SET_ERROR(EROFS)); mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); if (zv == NULL || zv->zv_objset == NULL) { if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, FTAG, &os)) != 0) { mutex_exit(&zvol_state_lock); return (SET_ERROR(error)); } owned = B_TRUE; if (zv != NULL) zv->zv_objset = os; } else { os = zv->zv_objset; } doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) || (error = zvol_check_volsize(volsize, doi->doi_data_block_size))) goto out; error = zvol_update_volsize(volsize, os); kmem_free(doi, sizeof (dmu_object_info_t)); if (error == 0 && zv != NULL) error = zvol_update_live_volsize(zv, volsize); out: if (owned) { dmu_objset_disown(os, FTAG); if (zv != NULL) zv->zv_objset = NULL; } mutex_exit(&zvol_state_lock); return (error); }
/* * Set ZFS_PROP_VOLSIZE set entry point. */ int zvol_set_volsize(const char *name, uint64_t volsize) { zvol_state_t *zv; dmu_object_info_t *doi; objset_t *os = NULL; uint64_t readonly; int error; mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); if (zv == NULL) { error = ENXIO; goto out; } doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); error = dmu_objset_hold(name, FTAG, &os); if (error) goto out_doi; if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) != 0 || (error = zvol_check_volsize(volsize,doi->doi_data_block_size)) != 0) goto out_doi; VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0); if (readonly) { error = EROFS; goto out_doi; } if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) { error = EROFS; goto out_doi; } error = zvol_update_volsize(zv, volsize, os); out_doi: kmem_free(doi, sizeof(dmu_object_info_t)); out: if (os) dmu_objset_rele(os, FTAG); mutex_exit(&zvol_state_lock); return (error); }
static int zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; int error; uint64_t ro; /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) return (SET_ERROR(-error)); zv->zv_objset = os; error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); if (error) goto out_owned; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_owned; error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) goto out_owned; set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; zv->zv_zilog = zil_open(os, zvol_get_data); if (ro || dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) { set_disk_ro(zv->zv_disk, 1); zv->zv_flags |= ZVOL_RDONLY; } else { set_disk_ro(zv->zv_disk, 0); zv->zv_flags &= ~ZVOL_RDONLY; } out_owned: if (error) { dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; } return (SET_ERROR(-error)); }
/* * Check that the hex label string is appropriate for the dataset being * mounted into the global_zone proper. * * Return an error if the hex label string is not default or * admin_low/admin_high. For admin_low labels, the corresponding * dataset must be readonly. */ int zfs_check_global_label(const char *dsname, const char *hexsl) { if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) return (0); if (strcasecmp(hexsl, ADMIN_HIGH) == 0) return (0); if (strcasecmp(hexsl, ADMIN_LOW) == 0) { /* must be readonly */ uint64_t rdonly; if (dsl_prop_get_integer(dsname, zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) return (SET_ERROR(EACCES)); return (rdonly ? 0 : EACCES); } return (SET_ERROR(EACCES)); }
static int __zvol_snapdev_hidden(const char *name) { uint64_t snapdev; char *parent; char *atp; int error = 0; parent = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) strlcpy(parent, name, MAXPATHLEN); if ((atp = strrchr(parent, '@')) != NULL) { *atp = '\0'; error = dsl_prop_get_integer(parent, "snapdev", &snapdev, NULL); if ((error == 0) && (snapdev == ZFS_SNAPDEV_HIDDEN)) error = ENODEV; } kmem_free(parent, MAXPATHLEN); return (error); }
/* * Mask errors to continue dmu_objset_find() traversal */ static int zvol_create_minors_cb(const char *dsname, void *arg) { uint64_t snapdev; int error; ASSERT0(MUTEX_HELD(&spa_namespace_lock)); error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL); if (error) return (0); /* * Given the name and the 'snapdev' property, create device minor nodes * with the linkages to zvols/snapshots as needed. * If the name represents a zvol, create a minor node for the zvol, then * check if its snapshots are 'visible', and if so, iterate over the * snapshots and create device minor nodes for those. */ if (strchr(dsname, '@') == 0) { /* create minor for the 'dsname' explicitly */ error = zvol_create_minor_impl(dsname); if ((error == 0 || error == EEXIST) && (snapdev == ZFS_SNAPDEV_VISIBLE)) { fstrans_cookie_t cookie = spl_fstrans_mark(); /* * traverse snapshots only, do not traverse children, * and skip the 'dsname' */ error = dmu_objset_find((char *)dsname, zvol_create_snap_minor_cb, (void *)dsname, DS_FIND_SNAPSHOTS); spl_fstrans_unmark(cookie); } } else { dprintf("zvol_create_minors_cb(): %s is not a zvol name\n", dsname); } return (0); }
int zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent) { const char *osname = zmo->z_osname; zfs_sb_t *zsb; struct inode *root_inode; uint64_t recordsize; int error; error = zfs_sb_create(osname, zmo, &zsb); if (error) return (error); if ((error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL))) goto out; zsb->z_sb = sb; sb->s_fs_info = zsb; sb->s_magic = ZFS_SUPER_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); zsb->z_bdi.ra_pages = 0; sb->s_bdi = &zsb->z_bdi; error = -zpl_bdi_setup_and_register(&zsb->z_bdi, "zfs"); if (error) goto out; /* Set callback operations for the file system. */ sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; #ifdef HAVE_S_D_OP sb->s_d_op = &zpl_dentry_operations; #endif /* HAVE_S_D_OP */ /* Set features for file system. */ zfs_set_fuid_feature(zsb); if (dmu_objset_is_snapshot(zsb->z_os)) { uint64_t pval; atime_changed_cb(zsb, B_FALSE); readonly_changed_cb(zsb, B_TRUE); if ((error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))) goto out; xattr_changed_cb(zsb, pval); if ((error = dsl_prop_get_integer(osname, "acltype", &pval, NULL))) goto out; acltype_changed_cb(zsb, pval); zsb->z_issnap = B_TRUE; zsb->z_os->os_sync = ZFS_SYNC_DISABLED; zsb->z_snap_defer_time = jiffies; mutex_enter(&zsb->z_os->os_user_ptr_lock); dmu_objset_set_user(zsb->z_os, zsb); mutex_exit(&zsb->z_os->os_user_ptr_lock); } else { error = zfs_sb_setup(zsb, B_TRUE); } /* Allocate a root inode for the filesystem. */ error = zfs_root(zsb, &root_inode); if (error) { (void) zfs_umount(sb); goto out; } /* Allocate a root dentry for the filesystem */ sb->s_root = d_make_root(root_inode); if (sb->s_root == NULL) { (void) zfs_umount(sb); error = SET_ERROR(ENOMEM); goto out; } if (!zsb->z_issnap) zfsctl_create(zsb); zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); zfs_sb_free(zsb); } return (error); }
static int zfs_domount(struct mount *mp, dev_t mount_dev, char *osname, vfs_context_t ctx) { uint64_t readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; struct timeval tv; ASSERT(mp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = mp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); #ifndef __APPLE__ /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); #endif vfs_setfsprivate(mp, zfsvfs); if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_RDONLY)); } else { mode = DS_MODE_PRIMARY; } error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, (cred_t *) vfs_context_ucred(ctx))) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ vnode_put(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t xattr; ASSERT(mode & DS_MODE_READONLY); #if 0 atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) goto out; xattr_changed_cb(zfsvfs, xattr); #endif zfsvfs->z_issnap = B_TRUE; } else { if (!vfs_isrdonly(mp)) zfs_unlinked_drain(zfsvfs); #ifndef __APPLE__ /* * Parse and replay the intent log. * * Because of ziltest, this must be done after * zfs_unlinked_drain(). (Further note: ziltest doesn't * use readonly mounts, where zfs_unlinked_drain() isn't * called.) This is because ziltest causes spa_sync() * to think it's committed, but actually it is not, so * the intent log contains many txg's worth of changes. * * In particular, if object N is in the unlinked set in * the last txg to actually sync, then it could be * actually freed in a later txg and then reallocated in * a yet later txg. This would write a "create object * N" record to the intent log. Normally, this would be * fine because the spa_sync() would have written out * the fact that object N is free, before we could write * the "create object N" intent log record. * * But when we are in ziltest mode, we advance the "open * txg" without actually spa_sync()-ing the changes to * disk. So we would see that object N is still * allocated and in the unlinked set, and there is an * intent log record saying to allocate it. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); #endif } #if 0 if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); #endif /* * Record the mount time (for Spotlight) */ microtime(&tv); zfsvfs->z_mount_time = tv.tv_sec; out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); mutex_destroy(&zfsvfs->z_znodes_lock); list_destroy(&zfsvfs->z_all_znodes); rw_destroy(&zfsvfs->z_unmount_lock); rw_destroy(&zfsvfs->z_unmount_inactive_lock); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { OSIncrementAtomic(&zfs_active_fs_count); (void) copystr(osname, vfs_statfs(mp)->f_mntfromname, MNAMELEN - 1, 0); vfs_getnewfsid(mp); } return (error); }
int zfs_domount(struct super_block *sb, void *data, int silent) { zpl_mount_data_t *zmd = data; const char *osname = zmd->z_osname; zfs_sb_t *zsb; struct inode *root_inode; uint64_t recordsize; int error; error = zfs_sb_create(osname, &zsb); if (error) return (error); if ((error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL))) goto out; zsb->z_sb = sb; sb->s_fs_info = zsb; sb->s_magic = ZFS_SUPER_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); #ifdef HAVE_BDI /* * 2.6.32 API change, * Added backing_device_info (BDI) per super block interfaces. A BDI * must be configured when using a non-device backed filesystem for * proper writeback. This is not required for older pdflush kernels. * * NOTE: Linux read-ahead is disabled in favor of zfs read-ahead. */ zsb->z_bdi.ra_pages = 0; sb->s_bdi = &zsb->z_bdi; error = -bdi_setup_and_register(&zsb->z_bdi, "zfs", BDI_CAP_MAP_COPY); if (error) goto out; #endif /* HAVE_BDI */ /* Set callback operations for the file system. */ sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; #ifdef HAVE_S_D_OP sb->s_d_op = &zpl_dentry_operations; #endif /* HAVE_S_D_OP */ /* Set features for file system. */ zfs_set_fuid_feature(zsb); if (dmu_objset_is_snapshot(zsb->z_os)) { uint64_t pval; atime_changed_cb(zsb, B_FALSE); readonly_changed_cb(zsb, B_TRUE); if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL))) goto out; xattr_changed_cb(zsb, pval); zsb->z_issnap = B_TRUE; zsb->z_os->os_sync = ZFS_SYNC_DISABLED; mutex_enter(&zsb->z_os->os_user_ptr_lock); dmu_objset_set_user(zsb->z_os, zsb); mutex_exit(&zsb->z_os->os_user_ptr_lock); } else { error = zfs_sb_setup(zsb, B_TRUE); } /* Allocate a root inode for the filesystem. */ error = zfs_root(zsb, &root_inode); if (error) { (void) zfs_umount(sb); goto out; } /* Allocate a root dentry for the filesystem */ sb->s_root = d_make_root(root_inode); if (sb->s_root == NULL) { (void) zfs_umount(sb); error = ENOMEM; goto out; } if (!zsb->z_issnap) zfsctl_create(zsb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); zfs_sb_free(zsb); } return (error); }
static int zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; int locked = 0; int error; uint64_t ro; /* * In all other cases the spa_namespace_lock is taken before the * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() * function calls fops->open() with the bdev->bd_mutex lock held. * * To avoid a potential lock inversion deadlock we preemptively * try to take the spa_namespace_lock(). Normally it will not * be contended and this is safe because spa_open_common() handles * the case where the caller already holds the spa_namespace_lock. * * When it is contended we risk a lock inversion if we were to * block waiting for the lock. Luckily, the __blkdev_get() * function allows us to return -ERESTARTSYS which will result in * bdev->bd_mutex being dropped, reacquired, and fops->open() being * called again. This process can be repeated safely until both * locks are acquired. */ if (!mutex_owned(&spa_namespace_lock)) { locked = mutex_tryenter(&spa_namespace_lock); if (!locked) return (-ERESTARTSYS); } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) goto out_mutex; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } zv->zv_objset = os; error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; zv->zv_zilog = zil_open(os, zvol_get_data); VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0); if (ro || dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) { set_disk_ro(zv->zv_disk, 1); zv->zv_flags |= ZVOL_RDONLY; } else { set_disk_ro(zv->zv_disk, 0); zv->zv_flags &= ~ZVOL_RDONLY; } out_mutex: if (locked) mutex_exit(&spa_namespace_lock); return (-error); }
static int zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) { dev_t mount_dev; uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; vfsp->vfs_dev = mount_dev; vfsp->vfs_fstype = zfsfstype; vfsp->vfs_bsize = recordsize; vfsp->vfs_flag |= VFS_NOTRUNC; vfsp->vfs_data = zfsvfs; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) mode = DS_MODE_PRIMARY | DS_MODE_READONLY; else mode = DS_MODE_PRIMARY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, cr)) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ VN_RELE(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); zfsvfs->z_issnap = B_TRUE; } else { error = zfs_register_callbacks(vfsp); if (error) goto out; /* * Start a delete thread running. */ (void) zfs_delete_thread_target(zfsvfs, 1); /* * Parse and replay the intent log. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector, (void (*)(void *))zfs_delete_wait_empty); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); } if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }
static int zfs_domount(vfs_t *vfsp, char *osname) { uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rrw_init(&zfsvfs->z_teardown_lock); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; zfsvfs->z_vfs->vfs_bsize = recordsize; vfsp->vfs_data = zfsvfs; vfsp->mnt_flag |= MNT_LOCAL; vfsp->mnt_kern_flag |= MNTK_MPSAFE; vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; mode = DS_MODE_OWNER; if (readonly) mode |= DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_OWNER | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp)) goto out; /* * Set features for file system. */ zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); if (zfsvfs->z_use_fuids) { vfs_set_feature(vfsp, VFSFT_XVATTR); vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); vfs_set_feature(vfsp, VFSFT_ACLONCREATE); } if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); } if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) goto out; xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } vfs_mountedfrom(vfsp, osname); if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); zfs_freezfsvfs(zfsvfs); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }
static int zfs_register_callbacks(vfs_t *vfsp) { struct dsl_dataset *ds = NULL; objset_t *os = NULL; zfsvfs_t *zfsvfs = NULL; uint64_t nbmand; int readonly, do_readonly = FALSE; int setuid, do_setuid = FALSE; int exec, do_exec = FALSE; int xattr, do_xattr = FALSE; int atime, do_atime = FALSE; int error = 0; ASSERT(vfsp); zfsvfs = vfsp->vfs_data; ASSERT(zfsvfs); os = zfsvfs->z_os; /* * This function can be called for a snapshot when we update snapshot's * mount point, which isn't really supported. */ if (dmu_objset_is_snapshot(os)) return (EOPNOTSUPP); /* * The act of registering our callbacks will destroy any mount * options we may have. In order to enable temporary overrides * of mount options, we stash away the current values and * restore them after we register the callbacks. */ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { readonly = B_TRUE; do_readonly = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { readonly = B_FALSE; do_readonly = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else { if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { setuid = B_FALSE; do_setuid = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { setuid = B_TRUE; do_setuid = B_TRUE; } } if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { exec = B_FALSE; do_exec = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { exec = B_TRUE; do_exec = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { xattr = B_FALSE; do_xattr = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { xattr = B_TRUE; do_xattr = B_TRUE; } if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { atime = B_FALSE; do_atime = B_TRUE; } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { atime = B_TRUE; do_atime = B_TRUE; } /* * nbmand is a special property. It can only be changed at * mount time. * * This is weird, but it is documented to only be changeable * at mount time. */ if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { nbmand = B_FALSE; } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { nbmand = B_TRUE; } else { char osname[MAXNAMELEN]; dmu_objset_name(os, osname); if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, NULL)) { return (error); } } /* * Register property callbacks. * * It would probably be fine to just check for i/o error from * the first prop_register(), but I guess I like to go * overboard... */ ds = dmu_objset_ds(os); error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "xattr", xattr_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "recordsize", blksz_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "readonly", readonly_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "setuid", setuid_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "exec", exec_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "snapdir", snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "aclmode", acl_mode_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "vscan", vscan_changed_cb, zfsvfs); if (error) goto unregister; /* * Invoke our callbacks to restore temporary mount options. */ if (do_readonly) readonly_changed_cb(zfsvfs, readonly); if (do_setuid) setuid_changed_cb(zfsvfs, setuid); if (do_exec) exec_changed_cb(zfsvfs, exec); if (do_xattr) xattr_changed_cb(zfsvfs, xattr); if (do_atime) atime_changed_cb(zfsvfs, atime); nbmand_changed_cb(zfsvfs, nbmand); return (0); unregister: /* * We may attempt to unregister some callbacks that are not * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); return (error); }