static int zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) { uint64_t sa_obj = 0; int error; error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error != 0 && error != ENOENT) return (error); error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); return (error); }
int zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp) { objset_t *os; zfs_sb_t *zsb; uint64_t zval; int i, error; uint64_t sa_obj; zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP); /* * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os); if (error) { kmem_free(zsb, sizeof (zfs_sb_t)); return (error); } /* * Optional temporary mount options, free'd in zfs_sb_free(). */ zsb->z_mntopts = (zmo ? zmo : zfs_mntopts_alloc()); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zsb->z_sb = NULL; zsb->z_parent = zsb; zsb->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; zsb->z_os = os; error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version); if (error) { goto out; } else if (zsb->z_version > ZPL_VERSION) { error = SET_ERROR(ENOTSUP); goto out; } if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) goto out; zsb->z_norm = (int)zval; if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) goto out; zsb->z_utf8 = (zval != 0); if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) goto out; zsb->z_case = (uint_t)zval; if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &zval)) != 0) goto out; zsb->z_acl_type = (uint_t)zval; /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (zsb->z_case == ZFS_CASE_INSENSITIVE || zsb->z_case == ZFS_CASE_MIXED) zsb->z_norm |= U8_TEXTPREP_TOUPPER; zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os); zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os); if (zsb->z_use_sa) { /* should either have both of these objects or none */ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error) goto out; error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval); if ((error == 0) && (zval == ZFS_XATTR_SA)) zsb->z_xattr_sa = B_TRUE; } else { /* * Pre SA versions file systems should never touch * either the attribute registration or layout objects. */ sa_obj = 0; } error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table); if (error) goto out; if (zsb->z_version >= ZPL_VERSION_SA) sa_register_update_callback(os, zfs_sa_upgrade); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zsb->z_root); if (error) goto out; ASSERT(zsb->z_root != 0); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, &zsb->z_unlinkedobj); if (error) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 8, 1, &zsb->z_userquota_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 8, 1, &zsb->z_groupquota_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, &zsb->z_shares_dir); if (error && error != ENOENT) goto out; mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rrm_init(&zsb->z_teardown_lock, B_FALSE); rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL); zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ, KM_SLEEP); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); *zsbp = zsb; return (0); out: dmu_objset_disown(os, zsb); *zsbp = NULL; vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); kmem_free(zsb, sizeof (zfs_sb_t)); return (error); }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; znode_t *zp; uint64_t sa_obj = 0; ASSERT(RRM_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); /* * We already own this, so just hold and rele it to update the * objset_t, as the one we had before may have been evicted. */ VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os)); VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb); VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset)); dmu_objset_rele(zsb->z_os, zsb); /* * Make sure version hasn't changed */ err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION, &zsb->z_version); if (err) goto bail; err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (err && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; if (zsb->z_version >= ZPL_VERSION_SA) sa_register_update_callback(zsb->z_os, zfs_sa_upgrade); VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zfs_set_fuid_feature(zsb); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrm_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't setup the sa framework, try to force * unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os); if (err) { zsb->z_os = NULL; } else { znode_t *zp; uint64_t sa_obj = 0; err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); } bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't reopen zfs_sb_t or, setup the * sa framework, force unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { struct super_block *sb; zfs_sb_t *zsb; uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; int i; znode_t *rootzp = NULL; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; /* * First attempt to create master node. */ /* * In an empty objset, there are no blocks to read and thus * there can be no i/o errors (which we assert below). */ moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); ASSERT(error == 0); /* * Set starting attributes. */ version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); elem = NULL; while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { /* For the moment we expect all zpl props to be uint64_ts */ uint64_t val; char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); VERIFY(nvpair_value_uint64(elem, &val) == 0); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) version = val; } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); /* * Create zap object used for SA attribute registration */ if (version >= ZPL_VERSION_SA) { sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT(error == 0); } else { sa_obj = 0; } /* * Create a delete queue. */ obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); /* * Create root znode. Create minimal znode/inode/zsb/sb * to allow zfs_mknode to work. */ vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID; vattr.va_mode = S_IFDIR|0755; vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE); rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG); zsb->z_os = os; zsb->z_parent = zsb; zsb->z_version = version; zsb->z_use_fuids = USE_FUIDS(version, os); zsb->z_use_sa = USE_SA(version, os); zsb->z_norm = norm; sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE); sb->s_fs_info = zsb; ZTOI(rootzp)->i_sb = sb; error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table); ASSERT(error == 0); /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zsb->z_norm |= U8_TEXTPREP_TOUPPER; mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); ASSERT(error == 0); zfs_acl_ids_free(&acl_ids); atomic_set(&ZTOI(rootzp)->i_count, 0); sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); /* * Create shares directory */ error = zfs_create_share_dir(zsb, tx); ASSERT(error == 0); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zsb->z_hold_mtx[i]); kmem_free(sb, sizeof (struct super_block)); kmem_free(zsb, sizeof (zfs_sb_t)); }