int dmu_objset_destroy(const char *name) { objset_t *os; int error; /* * If it looks like we'll be able to destroy it, and there's * an unplayed replay log sitting around, destroy the log. * It would be nicer to do this in dsl_dataset_destroy_sync(), * but the replay log objset is modified in open context. */ error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); if (error == 0) { dsl_dataset_t *ds = os->os->os_dsl_dataset; zil_destroy(dmu_objset_zil(os), B_FALSE); error = dsl_dataset_destroy(ds, os); /* * dsl_dataset_destroy() closes the ds. */ kmem_free(os, sizeof (objset_t)); } return (error); }
/* ARGSUSED */ int zil_clear_log_chain(char *osname, void *txarg) { zilog_t *zilog; zil_header_t *zh; objset_t *os; dmu_tx_t *tx; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); tx = dmu_tx_create(zilog->zl_os); (void) dmu_tx_assign(tx, TXG_WAIT); zh = zil_header_in_syncing_context(zilog); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); dmu_tx_commit(tx); dmu_objset_close(os); return (0); }
int zil_claim(char *osname, void *txarg) { dmu_tx_t *tx = txarg; uint64_t first_txg = dmu_tx_get_txg(tx); zilog_t *zilog; zil_header_t *zh; objset_t *os; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); /* * Record here whether the zil has any records to replay. * If the header block pointer is null or the block points * to the stubby then we know there are no valid log records. * We use the header to store this state as the the zilog gets * freed later in dmu_objset_close(). * The flags (and the rest of the header fields) are cleared in * zil_sync() as a result of a zil_destroy(), after replaying the log. * * Note, the intent log can be empty but still need the * stubby to be claimed. */ if (!zil_empty(zilog)) zh->zh_flags |= ZIL_REPLAY_NEEDED; /* * Claim all log blocks if we haven't already done so, and remember * the highest claimed sequence number. This ensures that if we can * read only part of the log now (e.g. due to a missing device), * but we can read the entire log later, we will not try to replay * or destroy beyond the last block we successfully claimed. */ ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { zh->zh_claim_txg = first_txg; zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, first_txg); dsl_dataset_dirty(dmu_objset_ds(os), tx); } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); dmu_objset_close(os); return (0); }
int zfs_set_version(const char *name, uint64_t newvers) { int error; objset_t *os; dmu_tx_t *tx; uint64_t curvers; /* * XXX for now, require that the filesystem be unmounted. Would * be nice to find the zfsvfs_t and just update that if * possible. */ if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (EINVAL); error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); if (error) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &curvers); if (error) goto out; if (newvers < curvers) { error = EINVAL; goto out; } tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); goto out; } error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); spa_history_internal_log(LOG_DS_UPGRADE, dmu_objset_spa(os), tx, CRED(), "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, dmu_objset_id(os)); dmu_tx_commit(tx); out: dmu_objset_close(os); return (error); }
static int dmu_objset_snapshot_one(char *name, void *arg) { struct snaparg *sn = arg; objset_t *os; int err; (void) strcpy(sn->failed, name); /* * Check permissions only when requested. This only applies when * doing a recursive snapshot. The permission checks for the starting * dataset have already been performed in zfs_secpolicy_snapshot() */ if (sn->checkperms == B_TRUE && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); if (err != 0) return (err); /* If the objset is in an inconsistent state, return busy */ if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { dmu_objset_close(os); return (EBUSY); } /* * NB: we need to wait for all in-flight changes to get to disk, * so that we snapshot those changes. zil_suspend does this as * a side effect. */ err = zil_suspend(dmu_objset_zil(os)); if (err == 0) { struct osnode *osn; dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, dsl_dataset_snapshot_sync, os->os->os_dsl_dataset, sn->snapname, 3); osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP); osn->os = os; list_insert_tail(&sn->objsets, osn); } else { dmu_objset_close(os); } return (err); }
/* * Reopen zfsvfs_t::z_os and release VOPs. */ int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) { int err; ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (err) { zfsvfs->z_os = NULL; } else { znode_t *zp; VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); /* * Attempt to re-establish all the active znodes with * their dbufs. If a zfs_rezget() fails, then we'll let * any potential callers discover that via ZFS_ENTER_VERIFY_VP * when they try to use their znode. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = list_next(&zfsvfs->z_all_znodes, zp)) { (void) zfs_rezget(zp); } mutex_exit(&zfsvfs->z_znodes_lock); } /* release the VOPs */ rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't reopen zfsvfs::z_os, force * unmount this file system. */ if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); } return (err); }
/* ARGSUSED */ int zil_vdev_offline(char *osname, void *arg) { objset_t *os; zilog_t *zilog; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) return (error); zilog = dmu_objset_zil(os); if (zil_suspend(zilog) != 0) error = EEXIST; else zil_resume(zilog); dmu_objset_close(os); return (error); }
/* ARGSUSED */ int zil_check_log_chain(char *osname, void *txarg) { zilog_t *zilog; zil_header_t *zh; blkptr_t blk; arc_buf_t *abuf; objset_t *os; char *lrbuf; zil_trailer_t *ztp; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); blk = zh->zh_log; if (BP_IS_HOLE(&blk)) { dmu_objset_close(os); return (0); /* no chain */ } for (;;) { error = zil_read_log_block(zilog, &blk, &abuf); if (error) break; lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); } dmu_objset_close(os); if (error == ECKSUM) return (0); /* normal end of chain */ return (error); }
static int zfs_domount(struct mount *mp, dev_t mount_dev, char *osname, vfs_context_t ctx) { uint64_t readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; struct timeval tv; ASSERT(mp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = mp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); #ifndef __APPLE__ /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); #endif vfs_setfsprivate(mp, zfsvfs); if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_RDONLY)); } else { mode = DS_MODE_PRIMARY; } error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, (cred_t *) vfs_context_ucred(ctx))) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ vnode_put(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t xattr; ASSERT(mode & DS_MODE_READONLY); #if 0 atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) goto out; xattr_changed_cb(zfsvfs, xattr); #endif zfsvfs->z_issnap = B_TRUE; } else { if (!vfs_isrdonly(mp)) zfs_unlinked_drain(zfsvfs); #ifndef __APPLE__ /* * Parse and replay the intent log. * * Because of ziltest, this must be done after * zfs_unlinked_drain(). (Further note: ziltest doesn't * use readonly mounts, where zfs_unlinked_drain() isn't * called.) This is because ziltest causes spa_sync() * to think it's committed, but actually it is not, so * the intent log contains many txg's worth of changes. * * In particular, if object N is in the unlinked set in * the last txg to actually sync, then it could be * actually freed in a later txg and then reallocated in * a yet later txg. This would write a "create object * N" record to the intent log. Normally, this would be * fine because the spa_sync() would have written out * the fact that object N is free, before we could write * the "create object N" intent log record. * * But when we are in ziltest mode, we advance the "open * txg" without actually spa_sync()-ing the changes to * disk. So we would see that object N is still * allocated and in the unlinked set, and there is an * intent log record saying to allocate it. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); #endif } #if 0 if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); #endif /* * Record the mount time (for Spotlight) */ microtime(&tv); zfsvfs->z_mount_time = tv.tv_sec; out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); mutex_destroy(&zfsvfs->z_znodes_lock); list_destroy(&zfsvfs->z_all_znodes); rw_destroy(&zfsvfs->z_unmount_lock); rw_destroy(&zfsvfs->z_unmount_inactive_lock); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { OSIncrementAtomic(&zfs_active_fs_count); (void) copystr(osname, vfs_statfs(mp)->f_mntfromname, MNAMELEN - 1, 0); vfs_getnewfsid(mp); } return (error); }
static int zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) { dev_t mount_dev; uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; vfsp->vfs_data = NULL; if (zfs_create_unique_device(&mount_dev) == -1) { error = ENODEV; goto out; } ASSERT(vfs_devismounted(mount_dev) == 0); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; vfsp->vfs_dev = mount_dev; vfsp->vfs_fstype = zfsfstype; vfsp->vfs_bsize = recordsize; vfsp->vfs_flag |= VFS_NOTRUNC; vfsp->vfs_data = zfsvfs; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; if (readonly) mode = DS_MODE_PRIMARY | DS_MODE_READONLY; else mode = DS_MODE_PRIMARY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_PRIMARY | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp, cr)) goto out; /* The call to zfs_init_fs leaves the vnode held, release it here. */ VN_RELE(ZTOV(zp)); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); zfsvfs->z_issnap = B_TRUE; } else { error = zfs_register_callbacks(vfsp); if (error) goto out; /* * Start a delete thread running. */ (void) zfs_delete_thread_target(zfsvfs, 1); /* * Parse and replay the intent log. */ zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector, (void (*)(void *))zfs_delete_wait_empty); if (!zil_disable) zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); } if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }
/* * Create a minor node for the specified volume. */ int zvol_create_minor(zfs_cmd_t *zc) { char *name = zc->zc_name; dev_t dev = zc->zc_dev; zvol_state_t *zv; objset_t *os; uint64_t volsize; minor_t minor = 0; struct pathname linkpath; int ds_mode = DS_MODE_PRIMARY; vnode_t *vp = NULL; char *devpath; size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + 1 + strlen(name) + 1; char chrbuf[30], blkbuf[30]; int error; mutex_enter(&zvol_state_lock); if ((zv = zvol_minor_lookup(name)) != NULL) { mutex_exit(&zvol_state_lock); return (EEXIST); } if (strchr(name, '@') != 0) ds_mode |= DS_MODE_READONLY; error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); if (error) { mutex_exit(&zvol_state_lock); return (error); } error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { dmu_objset_close(os); mutex_exit(&zvol_state_lock); return (error); } /* * If there's an existing /dev/zvol symlink, try to use the * same minor number we used last time. */ devpath = kmem_alloc(devpathlen, KM_SLEEP); (void) sprintf(devpath, "%s/%s", ZVOL_FULL_DEV_DIR, name); error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); kmem_free(devpath, devpathlen); if (error == 0 && vp->v_type != VLNK) error = EINVAL; if (error == 0) { pn_alloc(&linkpath); error = pn_getsymlink(vp, &linkpath, kcred); if (error == 0) { char *ms = strstr(linkpath.pn_path, ZVOL_PSEUDO_DEV); if (ms != NULL) { ms += strlen(ZVOL_PSEUDO_DEV); minor = stoi(&ms); } } pn_free(&linkpath); } if (vp != NULL) VN_RELE(vp); /* * If we found a minor but it's already in use, we must pick a new one. */ if (minor != 0 && ddi_get_soft_state(zvol_state, minor) != NULL) minor = 0; if (minor == 0) minor = zvol_minor_alloc(); if (minor == 0) { dmu_objset_close(os); mutex_exit(&zvol_state_lock); return (ENXIO); } if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { dmu_objset_close(os); mutex_exit(&zvol_state_lock); return (EAGAIN); } (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, name); (void) sprintf(chrbuf, "%uc,raw", minor); if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_soft_state_free(zvol_state, minor); dmu_objset_close(os); mutex_exit(&zvol_state_lock); return (EAGAIN); } (void) sprintf(blkbuf, "%uc", minor); if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); ddi_soft_state_free(zvol_state, minor); dmu_objset_close(os); mutex_exit(&zvol_state_lock); return (EAGAIN); } zv = ddi_get_soft_state(zvol_state, minor); (void) strcpy(zv->zv_name, name); zv->zv_min_bs = DEV_BSHIFT; zv->zv_minor = minor; zv->zv_volsize = volsize; zv->zv_objset = os; zv->zv_mode = ds_mode; zv->zv_zilog = zil_open(os, NULL); rw_init(&zv->zv_dslock, NULL, RW_DEFAULT, NULL); zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector, NULL); zvol_size_changed(zv, dev); /* XXX this should handle the possible i/o error */ VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), "readonly", zvol_readonly_changed_cb, zv) == 0); zvol_minors++; mutex_exit(&zvol_state_lock); return (0); }
/* ARGSUSED */ static int zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, int *direntflags, pathname_t *realpnp) { zfsctl_snapdir_t *sdp = dvp->v_data; objset_t *snap; char snapname[MAXNAMELEN]; char real[MAXNAMELEN]; char *mountpoint; zfs_snapentry_t *sep, search; struct mounta margs; vfs_t *vfsp; size_t mountpoint_len; avl_index_t where; zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; int err; /* * No extended attributes allowed under .zfs */ if (flags & LOOKUP_XATTR) return (EINVAL); ASSERT(dvp->v_type == VDIR); /* * If we get a recursive call, that means we got called * from the domount() code while it was trying to look up the * spec (which looks like a local path for zfs). We need to * add some flag to domount() to tell it not to do this lookup. */ if (MUTEX_HELD(&sdp->sd_lock)) return (ENOENT); ZFS_ENTER(zfsvfs); if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { ZFS_EXIT(zfsvfs); return (0); } if (flags & FIGNORECASE) { boolean_t conflict = B_FALSE; err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, MAXNAMELEN, &conflict); if (err == 0) { nm = real; } else if (err != ENOTSUP) { ZFS_EXIT(zfsvfs); return (err); } if (realpnp) (void) strlcpy(realpnp->pn_buf, nm, realpnp->pn_bufsize); if (conflict && direntflags) *direntflags = ED_CASE_CONFLICT; } mutex_enter(&sdp->sd_lock); search.se_name = (char *)nm; if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { *vpp = sep->se_root; VN_HOLD(*vpp); err = traverse(vpp); if (err) { VN_RELE(*vpp); *vpp = NULL; } else if (*vpp == sep->se_root) { /* * The snapshot was unmounted behind our backs, * try to remount it. */ goto domount; } else { /* * VROOT was set during the traverse call. We need * to clear it since we're pretending to be part * of our parent's vfs. */ (*vpp)->v_flag &= ~VROOT; } mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); return (err); } /* * The requested snapshot is not currently mounted, look it up. */ err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); if (err) { mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); /* * handle "ls *" or "?" in a graceful manner, * forcing EILSEQ to ENOENT. * Since shell ultimately passes "*" or "?" as name to lookup */ return (err == EILSEQ ? ENOENT : err); } if (dmu_objset_open(snapname, DMU_OST_ZFS, DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) { mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); return (ENOENT); } sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); (void) strcpy(sep->se_name, nm); *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); avl_insert(&sdp->sd_snaps, sep, where); dmu_objset_close(snap); domount: mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + strlen("/.zfs/snapshot/") + strlen(nm) + 1; mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", refstr_value(dvp->v_vfsp->vfs_mntpt), nm); margs.spec = snapname; margs.dir = mountpoint; margs.flags = MS_SYSSPACE | MS_NOMNTTAB; margs.fstype = "zfs"; margs.dataptr = NULL; margs.datalen = 0; margs.optptr = NULL; margs.optlen = 0; err = domount("zfs", &margs, *vpp, kcred, &vfsp); kmem_free(mountpoint, mountpoint_len); if (err == 0) { /* * Return the mounted root rather than the covered mount point. * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns * the ZFS vnode mounted on top of the GFS node. This ZFS * vnode is the root of the newly created vfsp. */ VFS_RELE(vfsp); err = traverse(vpp); } if (err == 0) { /* * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. * * This is where we lie about our v_vfsp in order to * make .zfs/snapshot/<snapname> accessible over NFS * without requiring manual mounts of <snapname>. */ ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; (*vpp)->v_vfsp = zfsvfs->z_vfs; (*vpp)->v_flag &= ~VROOT; } mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); /* * If we had an error, drop our hold on the vnode and * zfsctl_snapshot_inactive() will clean up. */ if (err) { VN_RELE(*vpp); *vpp = NULL; } return (err); }
static int zfs_domount(vfs_t *vfsp, char *osname) { uint64_t recordsize, readonly; int error = 0; int mode; zfsvfs_t *zfsvfs; znode_t *zp = NULL; ASSERT(vfsp); ASSERT(osname); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); rrw_init(&zfsvfs->z_teardown_lock); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; zfsvfs->z_vfs->vfs_bsize = recordsize; vfsp->vfs_data = zfsvfs; vfsp->mnt_flag |= MNT_LOCAL; vfsp->mnt_kern_flag |= MNTK_MPSAFE; vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) goto out; mode = DS_MODE_OWNER; if (readonly) mode |= DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (error == EROFS) { mode = DS_MODE_OWNER | DS_MODE_READONLY; error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); } if (error) goto out; if (error = zfs_init_fs(zfsvfs, &zp)) goto out; /* * Set features for file system. */ zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); if (zfsvfs->z_use_fuids) { vfs_set_feature(vfsp, VFSFT_XVATTR); vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); vfs_set_feature(vfsp, VFSFT_ACLONCREATE); } if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); } if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; ASSERT(mode & DS_MODE_READONLY); atime_changed_cb(zfsvfs, B_FALSE); readonly_changed_cb(zfsvfs, B_TRUE); if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) goto out; xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } vfs_mountedfrom(vfsp, osname); if (!zfsvfs->z_issnap) zfsctl_create(zfsvfs); out: if (error) { if (zfsvfs->z_os) dmu_objset_close(zfsvfs->z_os); zfs_freezfsvfs(zfsvfs); } else { atomic_add_32(&zfs_active_fs_count, 1); } return (error); }