Beispiel #1
0
/*
 * Called when an unmount is requested and certain sanity checks have
 * already passed.  At this point no dentries or inodes have been reclaimed
 * from their respective caches.  We drop the extra reference on the .zfs
 * control directory to allow everything to be reclaimed.  All snapshots
 * must already have been unmounted to reach this point.
 */
void
zfs_preumount(struct super_block *sb)
{
	zfs_sb_t *zsb = sb->s_fs_info;

	/* zsb is NULL when zfs_domount fails during mount */
	if (zsb) {
		zfsctl_destroy(sb->s_fs_info);
		/*
		 * Wait for iput_async before entering evict_inodes in
		 * generic_shutdown_super. The reason we must finish before
		 * evict_inodes is when lazytime is on, or when zfs_purgedir
		 * calls zfs_zget, iput would bump i_count from 0 to 1. This
		 * would race with the i_count check in evict_inodes. This means
		 * it could destroy the inode while we are still using it.
		 *
		 * We wait for two passes. xattr directories in the first pass
		 * may add xattr entries in zfs_purgedir, so in the second pass
		 * we wait for them. We don't use taskq_wait here because it is
		 * a pool wide taskq. Other mounted filesystems can constantly
		 * do iput_async and there's no guarantee when taskq will be
		 * empty.
		 */
		taskq_wait_outstanding(dsl_pool_iput_taskq(
		    dmu_objset_pool(zsb->z_os)), 0);
		taskq_wait_outstanding(dsl_pool_iput_taskq(
		    dmu_objset_pool(zsb->z_os)), 0);
	}
}
Beispiel #2
0
/*
 * Delete the entire contents of a directory.  Return a count
 * of the number of entries that could not be deleted. If we encounter
 * an error, return a count of at least one so that the directory stays
 * in the unlinked set.
 *
 * NOTE: this function assumes that the directory is inactive,
 *	so there is no need to lock its entries before deletion.
 *	Also, it assumes the directory contents is *only* regular
 *	files.
 */
static int
zfs_purgedir(znode_t *dzp)
{
	zap_cursor_t	zc;
	zap_attribute_t	zap;
	znode_t		*xzp;
	dmu_tx_t	*tx;
	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
	zfs_dirlock_t	dl;
	int skipped = 0;
	int error;

	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
	    zap_cursor_advance(&zc)) {
		error = zfs_zget(zfsvfs,
		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
		if (error) {
			skipped += 1;
			continue;
		}

/*
	    ASSERT((ZTOV(xzp)->v_type == VREG) ||
		    (ZTOV(xzp)->v_type == VLNK));
*/

		tx = dmu_tx_create(zfsvfs->z_os);
		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
		/* Is this really needed ? */
		zfs_sa_upgrade_txholds(tx, xzp);
		error = dmu_tx_assign(tx, TXG_WAIT);
		if (error) {
			dmu_tx_abort(tx);
			//VN_RELE(ZTOV(xzp)); // async
			VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
			skipped += 1;
			continue;
		}
		bzero(&dl, sizeof (dl));
		dl.dl_dzp = dzp;
		dl.dl_name = zap.za_name;

		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
		if (error)
			skipped += 1;
		dmu_tx_commit(tx);

		//VN_RELE(ZTOV(xzp)); // async
		VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
	}
	zap_cursor_fini(&zc);
	if (error != ENOENT)
		skipped += 1;
	return (skipped);
}
Beispiel #3
0
zilog_t *
zil_alloc(objset_t *os, zil_header_t *zh_phys)
{
	zilog_t *zilog;

	zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP);

	zilog->zl_header = zh_phys;
	zilog->zl_os = os;
	zilog->zl_spa = dmu_objset_spa(os);
	zilog->zl_dmu_pool = dmu_objset_pool(os);
	zilog->zl_destroy_txg = TXG_INITIAL - 1;

	mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);

	list_create(&zilog->zl_itx_list, sizeof (itx_t),
	    offsetof(itx_t, itx_node));

	list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
	    offsetof(lwb_t, lwb_node));

	mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL);

	avl_create(&zilog->zl_vdev_tree, zil_vdev_compare,
	    sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node));

	cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL);
	cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);

	return (zilog);
}
Beispiel #4
0
Datei: zvol.c Projekt: alek-p/zfs
/*
 * Ensure the zap is flushed then inform the VFS of the capacity change.
 */
static int
zvol_update_volsize(uint64_t volsize, objset_t *os)
{
	dmu_tx_t *tx;
	int error;
	uint64_t txg;

	ASSERT(MUTEX_HELD(&zvol_state_lock));

	tx = dmu_tx_create(os);
	dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
	dmu_tx_mark_netfree(tx);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		dmu_tx_abort(tx);
		return (SET_ERROR(error));
	}
	txg = dmu_tx_get_txg(tx);

	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
	    &volsize, tx);
	dmu_tx_commit(tx);

	txg_wait_synced(dmu_objset_pool(os), txg);

	if (error == 0)
		error = dmu_free_long_range(os,
		    ZVOL_OBJ, volsize, DMU_OBJECT_END);

	return (error);
}
Beispiel #5
0
/*
 * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj).
 */
uint64_t
bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
{
	zfeature_info_t *empty_bpobj_feat =
	    &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ];
	spa_t *spa = dmu_objset_spa(os);
	dsl_pool_t *dp = dmu_objset_pool(os);

	if (spa_feature_is_enabled(spa, empty_bpobj_feat)) {
		if (!spa_feature_is_active(spa, empty_bpobj_feat)) {
			ASSERT0(dp->dp_empty_bpobj);
			dp->dp_empty_bpobj =
			    bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx);
			VERIFY(zap_add(os,
			    DMU_POOL_DIRECTORY_OBJECT,
			    DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
			    &dp->dp_empty_bpobj, tx) == 0);
		}
		spa_feature_incr(spa, empty_bpobj_feat, tx);
		ASSERT(dp->dp_empty_bpobj != 0);
		return (dp->dp_empty_bpobj);
	} else {
		return (bpobj_alloc(os, blocksize, tx));
	}
}
Beispiel #6
0
void
dmu_objset_rele(objset_t *os, void *tag)
{
	dsl_pool_t *dp = dmu_objset_pool(os);
	dsl_dataset_rele(os->os_dsl_dataset, tag);
	dsl_pool_rele(dp, tag);
}
Beispiel #7
0
/* ARGSUSED */
static int
zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
    offset_t *offp, offset_t *nextp, void *data, int flags)
{
	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
	char snapname[ZFS_MAX_DATASET_NAME_LEN];
	uint64_t id, cookie;
	boolean_t case_conflict;
	int error;

	ZFS_ENTER(zfsvfs);

	cookie = *offp;
	dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
	error = dmu_snapshot_list_next(zfsvfs->z_os,
	    sizeof (snapname), snapname, &id, &cookie, &case_conflict);
	dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
	if (error) {
		ZFS_EXIT(zfsvfs);
		if (error == ENOENT) {
			*eofp = 1;
			return (0);
		}
		return (error);
	}

	if (flags & V_RDDIR_ENTFLAGS) {
		edirent_t *eodp = dp;

		(void) strcpy(eodp->ed_name, snapname);
		eodp->ed_ino = ZFSCTL_INO_SNAP(id);
		eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
	} else {
		struct dirent64 *odp = dp;

		(void) strcpy(odp->d_name, snapname);
		odp->d_ino = ZFSCTL_INO_SNAP(id);
	}
	*nextp = cookie;

	ZFS_EXIT(zfsvfs);

	return (0);
}
Beispiel #8
0
static int osd_object_sync(const struct lu_env *env, struct dt_object *dt)
{
	struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
	ENTRY;

	/* XXX: no other option than syncing the whole filesystem until we
	 * support ZIL */
	txg_wait_synced(dmu_objset_pool(osd->od_objset.os), 0ULL);

	RETURN(0);
}
Beispiel #9
0
static int
zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
{
	zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
	fstrans_cookie_t cookie;
	char snapname[MAXNAMELEN];
	boolean_t case_conflict;
	uint64_t id, pos;
	int error = 0;

	ZFS_ENTER(zsb);
	cookie = spl_fstrans_mark();

	if (!dir_emit_dots(filp, ctx))
		goto out;

	pos = ctx->pos;
	while (error == 0) {
		dsl_pool_config_enter(dmu_objset_pool(zsb->z_os), FTAG);
		error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
		    snapname, &id, &pos, &case_conflict);
		dsl_pool_config_exit(dmu_objset_pool(zsb->z_os), FTAG);
		if (error)
			goto out;

		if (!dir_emit(ctx, snapname, strlen(snapname),
		    ZFSCTL_INO_SHARES - id, DT_DIR))
			goto out;

		ctx->pos = pos;
	}
out:
	spl_fstrans_unmark(cookie);
	ZFS_EXIT(zsb);

	if (error == -ENOENT)
		return (0);

	return (error);
}
Beispiel #10
0
/*ARGSUSED*/
int
zfs_sync(struct super_block *sb, int wait, cred_t *cr)
{
	zfs_sb_t *zsb = sb->s_fs_info;

	/*
	 * Data integrity is job one.  We don't want a compromised kernel
	 * writing to the storage pool, so we never sync during panic.
	 */
	if (unlikely(oops_in_progress))
		return (0);

	/*
	 * Semantically, the only requirement is that the sync be initiated.
	 * The DMU syncs out txgs frequently, so there's nothing to do.
	 */
	if (!wait)
		return (0);

	if (zsb != NULL) {
		/*
		 * Sync a specific filesystem.
		 */
		dsl_pool_t *dp;

		ZFS_ENTER(zsb);
		dp = dmu_objset_pool(zsb->z_os);

		/*
		 * If the system is shutting down, then skip any
		 * filesystems which may exist on a suspended pool.
		 */
		if (spa_suspended(dp->dp_spa)) {
			ZFS_EXIT(zsb);
			return (0);
		}

		if (zsb->z_log != NULL)
			zil_commit(zsb->z_log, 0);

		ZFS_EXIT(zsb);
	} else {
		/*
		 * Sync all ZFS filesystems.  This is what happens when you
		 * run sync(1M).  Unlike other filesystems, ZFS honors the
		 * request by waiting for all pools to commit all dirty data.
		 */
		spa_sync_allpools();
	}

	return (0);
}
Beispiel #11
0
static int
zfs_vfs_sync(struct mount *mp, __unused int waitfor, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);

	ZFS_ENTER(zfsvfs);

	/*
	 * Mac OS X needs a file system modify time
	 *
	 * We use the mtime of the "com.apple.system.mtime" 
	 * extended attribute, which is associated with the
	 * file system root directory.
	 *
	 * Here we sync any mtime changes to this attribute.
	 */
	if (zfsvfs->z_mtime_vp != NULL) {
		timestruc_t  mtime;
		znode_t  *zp;
top:
		zp = VTOZ(zfsvfs->z_mtime_vp);
		ZFS_TIME_DECODE(&mtime, zp->z_phys->zp_mtime);
		if (zfsvfs->z_last_mtime_synced < mtime.tv_sec) {
			dmu_tx_t  *tx;
			int  error;

			tx = dmu_tx_create(zfsvfs->z_os);
			dmu_tx_hold_bonus(tx, zp->z_id);
			error = dmu_tx_assign(tx, zfsvfs->z_assign);
			if (error) {
				if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
					dmu_tx_wait(tx);
					dmu_tx_abort(tx);
					goto top;
				}
				dmu_tx_abort(tx);
			} else {
				dmu_buf_will_dirty(zp->z_dbuf, tx);
				dmu_tx_commit(tx);
				zfsvfs->z_last_mtime_synced = mtime.tv_sec;
			}
		}
	}

	if (zfsvfs->z_log != NULL)
		zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
	else
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
	ZFS_EXIT(zfsvfs);

	return (0);
}
Beispiel #12
0
void
bpobj_decr_empty(objset_t *os, dmu_tx_t *tx)
{
	dsl_pool_t *dp = dmu_objset_pool(os);

	spa_feature_decr(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ, tx);
	if (!spa_feature_is_active(dmu_objset_spa(os),
	    SPA_FEATURE_EMPTY_BPOBJ)) {
		VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
		    DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_EMPTY_BPOBJ, tx));
		VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx));
		dp->dp_empty_bpobj = 0;
	}
Beispiel #13
0
static int osd_object_sync(const struct lu_env *env, struct dt_object *dt,
			   __u64 start, __u64 end)
{
	struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
	ENTRY;

	/* XXX: no other option than syncing the whole filesystem until we
	 * support ZIL.  If the object tracked the txg that it was last
	 * modified in, it could pass that txg here instead of "0".  Maybe
	 * the changes are already committed, so no wait is needed at all? */
	txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);

	RETURN(0);
}
Beispiel #14
0
int
zfs_vnop_ioctl_fullfsync(struct vnode *vp, vfs_context_t ct, zfsvfs_t *zfsvfs)
{
	int error;

    error = zfs_fsync(vp, /*syncflag*/0, NULL, (caller_context_t *)ct);
	if (error)
		return (error);

	if (zfsvfs->z_log != NULL)
		zil_commit(zfsvfs->z_log, 0);
	else
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
	return (0);
}
Beispiel #15
0
int
dmu_objset_userspace_upgrade(objset_t *os)
{
	uint64_t obj;
	int err = 0;

	if (dmu_objset_userspace_present(os))
		return (0);
	if (!dmu_objset_userused_enabled(os->os))
		return (ENOTSUP);
	if (dmu_objset_is_snapshot(os))
		return (EINVAL);

	/*
	 * We simply need to mark every object dirty, so that it will be
	 * synced out and now accounted.  If this is called
	 * concurrently, or if we already did some work before crashing,
	 * that's fine, since we track each object's accounted state
	 * independently.
	 */

	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
		dmu_tx_t *tx;
		dmu_buf_t *db;
		int objerr;

		if (issig(JUSTLOOKING) && issig(FORREAL))
			return (EINTR);

		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
		if (objerr)
			continue;
		tx = dmu_tx_create(os);
		dmu_tx_hold_bonus(tx, obj);
		objerr = dmu_tx_assign(tx, TXG_WAIT);
		if (objerr) {
			dmu_tx_abort(tx);
			continue;
		}
		dmu_buf_will_dirty(db, tx);
		dmu_buf_rele(db, FTAG);
		dmu_tx_commit(tx);
	}

	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
	txg_wait_synced(dmu_objset_pool(os), 0);
	return (0);
}
Beispiel #16
0
/*ARGSUSED*/
int
zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
{
	/*
	 * Data integrity is job one.  We don't want a compromised kernel
	 * writing to the storage pool, so we never sync during panic.
	 */
	if (panicstr)
		return (0);

	/*
	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
	 * to sync metadata, which they would otherwise cache indefinitely.
	 * Semantically, the only requirement is that the sync be initiated.
	 * The DMU syncs out txgs frequently, so there's nothing to do.
	 */
	if (flag & SYNC_ATTR)
		return (0);

	if (vfsp != NULL) {
		/*
		 * Sync a specific filesystem.
		 */
		zfsvfs_t *zfsvfs = vfsp->vfs_data;

		ZFS_ENTER(zfsvfs);
		if (zfsvfs->z_log != NULL)
			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
		else
			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		ZFS_EXIT(zfsvfs);
	} else {
		/*
		 * Sync all ZFS filesystems.  This is what happens when you
		 * run sync(1M).  Unlike other filesystems, ZFS honors the
		 * request by waiting for all pools to commit all dirty data.
		 */
		spa_sync_allpools();
	}

	return (0);
}
Beispiel #17
0
Datei: zvol.c Projekt: alek-p/zfs
static void
zvol_last_close(zvol_state_t *zv)
{
	zil_close(zv->zv_zilog);
	zv->zv_zilog = NULL;

	dmu_buf_rele(zv->zv_dbuf, zvol_tag);
	zv->zv_dbuf = NULL;

	/*
	 * Evict cached data
	 */
	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
	    !(zv->zv_flags & ZVOL_RDONLY))
		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
	(void) dmu_objset_evict_dbufs(zv->zv_objset);

	dmu_objset_disown(zv->zv_objset, zvol_tag);
	zv->zv_objset = NULL;
}
Beispiel #18
0
/*
 * When we are called, os MUST refer to an objset associated with a dataset
 * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
 * == tag.  We will then release and reacquire ownership of the dataset while
 * holding the pool config_rwlock to avoid intervening namespace or ownership
 * changes may occur.
 *
 * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
 * release the hold on its dataset and acquire a new one on the dataset of the
 * same name so that it can be partially torn down and reconstructed.
 */
void
dmu_objset_refresh_ownership(objset_t *os, void *tag)
{
	dsl_pool_t *dp;
	dsl_dataset_t *ds, *newds;
	char name[MAXNAMELEN];

	ds = os->os_dsl_dataset;
	VERIFY3P(ds, !=, NULL);
	VERIFY3P(ds->ds_owner, ==, tag);
	VERIFY(dsl_dataset_long_held(ds));

	dsl_dataset_name(ds, name);
	dp = dmu_objset_pool(os);
	dsl_pool_config_enter(dp, FTAG);
	dmu_objset_disown(os, tag);
	VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
	VERIFY3P(newds, ==, os->os_dsl_dataset);
	dsl_pool_config_exit(dp, FTAG);
}
/*ARGSUSED*/
static int
zfs_sync(vfs_t *vfsp, int waitfor)
{

	/*
	 * Data integrity is job one.  We don't want a compromised kernel
	 * writing to the storage pool, so we never sync during panic.
	 */
	if (panicstr)
		return (0);

	if (vfsp != NULL) {
		/*
		 * Sync a specific filesystem.
		 */
		zfsvfs_t *zfsvfs = vfsp->vfs_data;
		int error;

		error = vfs_stdsync(vfsp, waitfor);
		if (error != 0)
			return (error);

		ZFS_ENTER(zfsvfs);
		if (zfsvfs->z_log != NULL)
			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
		else
			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		ZFS_EXIT(zfsvfs);
	} else {
		/*
		 * Sync all ZFS filesystems.  This is what happens when you
		 * run sync(1M).  Unlike other filesystems, ZFS honors the
		 * request by waiting for all pools to commit all dirty data.
		 */
		spa_sync_allpools();
	}

	return (0);
}
Beispiel #20
0
/*
 * Teardown the zfs_sb_t.
 *
 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
 * and 'z_teardown_inactive_lock' held.
 */
int
zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
{
	znode_t	*zp;

	rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);

	if (!unmounting) {
		/*
		 * We purge the parent filesystem's super block as the
		 * parent filesystem and all of its snapshots have their
		 * inode's super block set to the parent's filesystem's
		 * super block.  Note,  'z_parent' is self referential
		 * for non-snapshots.
		 */
		shrink_dcache_sb(zsb->z_parent->z_sb);
	}

	/*
	 * If someone has not already unmounted this file system,
	 * drain the iput_taskq to ensure all active references to the
	 * zfs_sb_t have been handled only then can it be safely destroyed.
	 */
	if (zsb->z_os)
		taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));

	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zsb->z_log) {
		zil_close(zsb->z_log);
		zsb->z_log = NULL;
	}

	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);

	/*
	 * If we are not unmounting (ie: online recv) and someone already
	 * unmounted this file system while we were doing the switcheroo,
	 * or a reopen of z_os failed then just bail out now.
	 */
	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
		rw_exit(&zsb->z_teardown_inactive_lock);
		rrw_exit(&zsb->z_teardown_lock, FTAG);
		return (EIO);
	}

	/*
	 * At this point there are no VFS ops active, and any new VFS ops
	 * will fail with EIO since we have z_teardown_lock for writer (only
	 * relevant for forced unmount).
	 *
	 * Release all holds on dbufs.
	 */
	mutex_enter(&zsb->z_znodes_lock);
	for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
	    zp = list_next(&zsb->z_all_znodes, zp)) {
		if (zp->z_sa_hdl) {
			ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
			zfs_znode_dmu_fini(zp);
		}
	}
	mutex_exit(&zsb->z_znodes_lock);

	/*
	 * If we are unmounting, set the unmounted flag and let new VFS ops
	 * unblock.  zfs_inactive will have the unmounted behavior, and all
	 * other VFS ops will fail with EIO.
	 */
	if (unmounting) {
		zsb->z_unmounted = B_TRUE;
		rrw_exit(&zsb->z_teardown_lock, FTAG);
		rw_exit(&zsb->z_teardown_inactive_lock);
	}

	/*
	 * z_os will be NULL if there was an error in attempting to reopen
	 * zsb, so just return as the properties had already been
	 *
	 * unregistered and cached data had been evicted before.
	 */
	if (zsb->z_os == NULL)
		return (0);

	/*
	 * Unregister properties.
	 */
	zfs_unregister_callbacks(zsb);

	/*
	 * Evict cached data
	 */
	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
	    !zfs_is_readonly(zsb))
		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
	dmu_objset_evict_dbufs(zsb->z_os);

	return (0);
}
/*
 * Teardown the zfsvfs::z_os.
 *
 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
 * and 'z_teardown_inactive_lock' held.
 */
static int
zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
{
	znode_t	*zp;

	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);

	if (!unmounting) {
		/*
		 * We purge the parent filesystem's vfsp as the parent
		 * filesystem and all of its snapshots have their vnode's
		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
		 * 'z_parent' is self referential for non-snapshots.
		 */
		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
#ifdef FREEBSD_NAMECACHE
		cache_purgevfs(zfsvfs->z_parent->z_vfs);
#endif
	}

	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);

	/*
	 * If we are not unmounting (ie: online recv) and someone already
	 * unmounted this file system while we were doing the switcheroo,
	 * or a reopen of z_os failed then just bail out now.
	 */
	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
		rw_exit(&zfsvfs->z_teardown_inactive_lock);
		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
		return (EIO);
	}

	/*
	 * At this point there are no vops active, and any new vops will
	 * fail with EIO since we have z_teardown_lock for writer (only
	 * relavent for forced unmount).
	 *
	 * Release all holds on dbufs.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
	    zp = list_next(&zfsvfs->z_all_znodes, zp))
		if (zp->z_dbuf) {
			ASSERT(ZTOV(zp)->v_count >= 0);
			zfs_znode_dmu_fini(zp);
		}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * If we are unmounting, set the unmounted flag and let new vops
	 * unblock.  zfs_inactive will have the unmounted behavior, and all
	 * other vops will fail with EIO.
	 */
	if (unmounting) {
		zfsvfs->z_unmounted = B_TRUE;
		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
		rw_exit(&zfsvfs->z_teardown_inactive_lock);

#ifdef __FreeBSD__
		/*
		 * Some znodes might not be fully reclaimed, wait for them.
		 */
		mutex_enter(&zfsvfs->z_znodes_lock);
		while (list_head(&zfsvfs->z_all_znodes) != NULL) {
			msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
			    "zteardown", 0);
		}
		mutex_exit(&zfsvfs->z_znodes_lock);
#endif
	}

	/*
	 * z_os will be NULL if there was an error in attempting to reopen
	 * zfsvfs, so just return as the properties had already been
	 * unregistered and cached data had been evicted before.
	 */
	if (zfsvfs->z_os == NULL)
		return (0);

	/*
	 * Unregister properties.
	 */
	zfs_unregister_callbacks(zfsvfs);

	/*
	 * Evict cached data
	 */
	if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
	}

	return (0);
}
Beispiel #22
0
void
zfs_rmnode(znode_t *zp)
{
    zfs_sb_t	*zsb = ZTOZSB(zp);
    objset_t	*os = zsb->z_os;
    znode_t		*xzp = NULL;
    dmu_tx_t	*tx;
    uint64_t	acl_obj;
    uint64_t	xattr_obj;
    uint64_t	count;
    int		error;

    ASSERT(zp->z_links == 0);
    ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);

    /*
     * If this is an attribute directory, purge its contents.
     */
    if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
        error = zap_count(os, zp->z_id, &count);
        if (error) {
            zfs_znode_dmu_fini(zp);
            return;
        }

        if (count > 0) {
            taskq_t *taskq;

            /*
             * There are still directory entries in this xattr
             * directory.  Let zfs_unlinked_drain() deal with
             * them to avoid deadlocking this process in the
             * zfs_purgedir()->zfs_zget()->ilookup() callpath
             * on the xattr inode's I_FREEING bit.
             */
            taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
            taskq_dispatch(taskq, (task_func_t *)
                           zfs_unlinked_drain, zsb, TQ_SLEEP);

            zfs_znode_dmu_fini(zp);
            return;
        }
    }

    /*
     * Free up all the data in the file.
     */
    error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
    if (error) {
        /*
         * Not enough space.  Leave the file in the unlinked set.
         */
        zfs_znode_dmu_fini(zp);
        return;
    }

    /*
     * If the file has extended attributes, we're going to unlink
     * the xattr dir.
     */
    error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
                      &xattr_obj, sizeof (xattr_obj));
    if (error == 0 && xattr_obj) {
        error = zfs_zget(zsb, xattr_obj, &xzp);
        ASSERT(error == 0);
    }

    acl_obj = zfs_external_acl(zp);

    /*
     * Set up the final transaction.
     */
    tx = dmu_tx_create(os);
    dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
    dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
    if (xzp) {
        dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
        dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
    }
    if (acl_obj)
        dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);

    zfs_sa_upgrade_txholds(tx, zp);
    error = dmu_tx_assign(tx, TXG_WAIT);
    if (error) {
        /*
         * Not enough space to delete the file.  Leave it in the
         * unlinked set, leaking it until the fs is remounted (at
         * which point we'll call zfs_unlinked_drain() to process it).
         */
        dmu_tx_abort(tx);
        zfs_znode_dmu_fini(zp);
        goto out;
    }

    if (xzp) {
        ASSERT(error == 0);
        mutex_enter(&xzp->z_lock);
        xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
        xzp->z_links = 0;	/* no more links to it */
        VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
                              &xzp->z_links, sizeof (xzp->z_links), tx));
        mutex_exit(&xzp->z_lock);
        zfs_unlinked_add(xzp, tx);
    }

    /* Remove this znode from the unlinked set */
    VERIFY3U(0, ==,
             zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));

    zfs_znode_delete(zp, tx);

    dmu_tx_commit(tx);
out:
    if (xzp)
        iput(ZTOI(xzp));
}
Beispiel #23
0
/*
 * This function is either called directly from reclaim, or in a delayed
 * manner, so the value of zp->z_vnode may be NULL.
 */
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	uint64_t	xattr_obj;
	int		error;

	ASSERT(zp->z_links == 0);

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (IFTOVT((mode_t)zp->z_mode) == VDIR &&
	    (zp->z_pflags & ZFS_XATTR)) {

        if (!ZTOV(zp) || zfs_purgedir(zp) != 0) {
            /*
             * Not enough space to delete some xattrs.
             * Leave it in the unlinked set.
             */
            zfs_znode_dmu_fini(zp);
            return;
        }
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		//zfs_znode_free(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
	    &xattr_obj, sizeof (xattr_obj));
	if (error == 0 && xattr_obj) {
		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zfs_external_acl(zp);

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);

	zfs_sa_upgrade_txholds(tx, zp);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		//zfs_znode_free(zp);
		goto out;
	}

	if (xzp) {
		ASSERT(error == 0);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_links = 0;	/* no more links to it */
		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
		    &xzp->z_links, sizeof (xzp->z_links), tx));
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		//VN_RELE(ZTOV(xzp)); // async
	VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
}
Beispiel #24
0
int
zvol_strategy(buf_t *bp)
{
	zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev));
	uint64_t off, volsize;
	size_t size, resid;
	char *addr;
	objset_t *os;
	int error = 0;
	int sync;
	int reading;
	int txg_sync_needed = B_FALSE;

	if (zv == NULL) {
		bioerror(bp, ENXIO);
		biodone(bp);
		return (0);
	}

	if (getminor(bp->b_edev) == 0) {
		bioerror(bp, EINVAL);
		biodone(bp);
		return (0);
	}

	if (zv->zv_readonly && !(bp->b_flags & B_READ)) {
		bioerror(bp, EROFS);
		biodone(bp);
		return (0);
	}

	off = ldbtob(bp->b_blkno);
	volsize = zv->zv_volsize;

	os = zv->zv_objset;
	ASSERT(os != NULL);
	sync = !(bp->b_flags & B_ASYNC) && !(zil_disable);

	bp_mapin(bp);
	addr = bp->b_un.b_addr;
	resid = bp->b_bcount;

	/*
	 * There must be no buffer changes when doing a dmu_sync() because
	 * we can't change the data whilst calculating the checksum.
	 * A better approach than a per zvol rwlock would be to lock ranges.
	 */
	reading = bp->b_flags & B_READ;
	if (reading || resid <= zvol_immediate_write_sz)
		rw_enter(&zv->zv_dslock, RW_READER);
	else
		rw_enter(&zv->zv_dslock, RW_WRITER);

	while (resid != 0 && off < volsize) {

		size = MIN(resid, 1UL << 20);	/* cap at 1MB per tx */

		if (size > volsize - off)	/* don't write past the end */
			size = volsize - off;

		if (reading) {
			error = dmu_read(os, ZVOL_OBJ, off, size, addr);
		} else {
			dmu_tx_t *tx = dmu_tx_create(os);
			dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
			error = dmu_tx_assign(tx, TXG_WAIT);
			if (error) {
				dmu_tx_abort(tx);
			} else {
				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
				if (sync) {
					/* use the ZIL to commit this write */
					if (zvol_log_write(zv, tx, off, size,
					    addr) != 0) {
						txg_sync_needed = B_TRUE;
					}
				}
				dmu_tx_commit(tx);
			}
		}
		if (error)
			break;
		off += size;
		addr += size;
		resid -= size;
	}
	rw_exit(&zv->zv_dslock);

	if ((bp->b_resid = resid) == bp->b_bcount)
		bioerror(bp, off > volsize ? EINVAL : error);

	biodone(bp);

	if (sync) {
		if (txg_sync_needed)
			txg_wait_synced(dmu_objset_pool(os), 0);
		else
			zil_commit(zv->zv_zilog, UINT64_MAX, 0);
	}

	return (0);
}
Beispiel #25
0
int
zfs_register_callbacks(zfs_sb_t *zsb)
{
	struct dsl_dataset *ds = NULL;
	objset_t *os = zsb->z_os;
	zfs_mntopts_t *zmo = zsb->z_mntopts;
	int error = 0;

	ASSERT(zsb);
	ASSERT(zmo);

	/*
	 * The act of registering our callbacks will destroy any mount
	 * options we may have.  In order to enable temporary overrides
	 * of mount options, we stash away the current values and
	 * restore them after we register the callbacks.
	 */
	if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os))) {
		zmo->z_do_readonly = B_TRUE;
		zmo->z_readonly = B_TRUE;
	}

	/*
	 * Register property callbacks.
	 *
	 * It would probably be fine to just check for i/o error from
	 * the first prop_register(), but I guess I like to go
	 * overboard...
	 */
	ds = dmu_objset_ds(os);
	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
	error = dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb);
	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
	if (error)
		goto unregister;

	/*
	 * Invoke our callbacks to restore temporary mount options.
	 */
	if (zmo->z_do_readonly)
		readonly_changed_cb(zsb, zmo->z_readonly);
	if (zmo->z_do_setuid)
		setuid_changed_cb(zsb, zmo->z_setuid);
	if (zmo->z_do_exec)
		exec_changed_cb(zsb, zmo->z_exec);
	if (zmo->z_do_devices)
		devices_changed_cb(zsb, zmo->z_devices);
	if (zmo->z_do_xattr)
		xattr_changed_cb(zsb, zmo->z_xattr);
	if (zmo->z_do_atime)
		atime_changed_cb(zsb, zmo->z_atime);
	if (zmo->z_do_relatime)
		relatime_changed_cb(zsb, zmo->z_relatime);
	if (zmo->z_do_nbmand)
		nbmand_changed_cb(zsb, zmo->z_nbmand);

	return (0);

unregister:
	/*
	 * We may attempt to unregister some callbacks that are not
	 * registered, but this is OK; it will simply return ENOMSG,
	 * which we will ignore.
	 */
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
	    atime_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RELATIME),
	    relatime_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
	    xattr_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
	    blksz_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
	    readonly_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
	    devices_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
	    setuid_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
	    exec_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
	    snapdir_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLTYPE),
	    acltype_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
	    acl_inherit_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
	    vscan_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND),
	    nbmand_changed_cb, zsb);

	return (error);
}
Beispiel #26
0
/*
 * Teardown the zfs_sb_t.
 *
 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
 * and 'z_teardown_inactive_lock' held.
 */
int
zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
{
	znode_t	*zp;

	/*
	 * If someone has not already unmounted this file system,
	 * drain the iput_taskq to ensure all active references to the
	 * zfs_sb_t have been handled only then can it be safely destroyed.
	 */
	if (zsb->z_os) {
		/*
		 * If we're unmounting we have to wait for the list to
		 * drain completely.
		 *
		 * If we're not unmounting there's no guarantee the list
		 * will drain completely, but iputs run from the taskq
		 * may add the parents of dir-based xattrs to the taskq
		 * so we want to wait for these.
		 *
		 * We can safely read z_nr_znodes without locking because the
		 * VFS has already blocked operations which add to the
		 * z_all_znodes list and thus increment z_nr_znodes.
		 */
		int round = 0;
		while (zsb->z_nr_znodes > 0) {
			taskq_wait_outstanding(dsl_pool_iput_taskq(
			    dmu_objset_pool(zsb->z_os)), 0);
			if (++round > 1 && !unmounting)
				break;
		}
	}

	rrm_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);

	if (!unmounting) {
		/*
		 * We purge the parent filesystem's super block as the
		 * parent filesystem and all of its snapshots have their
		 * inode's super block set to the parent's filesystem's
		 * super block.  Note,  'z_parent' is self referential
		 * for non-snapshots.
		 */
		shrink_dcache_sb(zsb->z_parent->z_sb);
	}

	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zsb->z_log) {
		zil_close(zsb->z_log);
		zsb->z_log = NULL;
	}

	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);

	/*
	 * If we are not unmounting (ie: online recv) and someone already
	 * unmounted this file system while we were doing the switcheroo,
	 * or a reopen of z_os failed then just bail out now.
	 */
	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
		rw_exit(&zsb->z_teardown_inactive_lock);
		rrm_exit(&zsb->z_teardown_lock, FTAG);
		return (SET_ERROR(EIO));
	}

	/*
	 * At this point there are no VFS ops active, and any new VFS ops
	 * will fail with EIO since we have z_teardown_lock for writer (only
	 * relevant for forced unmount).
	 *
	 * Release all holds on dbufs.
	 */
	if (!unmounting) {
		mutex_enter(&zsb->z_znodes_lock);
		for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
		zp = list_next(&zsb->z_all_znodes, zp)) {
			if (zp->z_sa_hdl)
				zfs_znode_dmu_fini(zp);
		}
		mutex_exit(&zsb->z_znodes_lock);
	}

	/*
	 * If we are unmounting, set the unmounted flag and let new VFS ops
	 * unblock.  zfs_inactive will have the unmounted behavior, and all
	 * other VFS ops will fail with EIO.
	 */
	if (unmounting) {
		zsb->z_unmounted = B_TRUE;
		rrm_exit(&zsb->z_teardown_lock, FTAG);
		rw_exit(&zsb->z_teardown_inactive_lock);
	}

	/*
	 * z_os will be NULL if there was an error in attempting to reopen
	 * zsb, so just return as the properties had already been
	 *
	 * unregistered and cached data had been evicted before.
	 */
	if (zsb->z_os == NULL)
		return (0);

	/*
	 * Unregister properties.
	 */
	zfs_unregister_callbacks(zsb);

	/*
	 * Evict cached data
	 */
	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
	    !zfs_is_readonly(zsb))
		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
	dmu_objset_evict_dbufs(zsb->z_os);

	return (0);
}
Beispiel #27
0
static void
zfs_objset_close(zfsvfs_t *zfsvfs)
{
	zfs_delete_t	*zd = &zfsvfs->z_delete_head;
	znode_t		*zp, *nextzp;
	objset_t	*os = zfsvfs->z_os;

	/*
	 * Stop all delete threads.
	 */
	(void) zfs_delete_thread_target(zfsvfs, 0);

	/*
	 * For forced unmount, at this point all vops except zfs_inactive
	 * are erroring EIO. We need to now suspend zfs_inactive threads
	 * while we are freeing dbufs before switching zfs_inactive
	 * to use behaviour without a objset.
	 */
	rw_enter(&zfsvfs->z_um_lock, RW_WRITER);

	/*
	 * Release all delete in progress znodes
	 * They will be processed when the file system remounts.
	 */
	mutex_enter(&zd->z_mutex);
	while (zp = list_head(&zd->z_znodes)) {
		list_remove(&zd->z_znodes, zp);
		zp->z_dbuf_held = 0;
		dmu_buf_rele(zp->z_dbuf, NULL);
	}
	mutex_exit(&zd->z_mutex);

	/*
	 * Release all holds on dbufs
	 * Note, although we have stopped all other vop threads and
	 * zfs_inactive(), the dmu can callback via znode_pageout_func()
	 * which can zfs_znode_free() the znode.
	 * So we lock z_all_znodes; search the list for a held
	 * dbuf; drop the lock (we know zp can't disappear if we hold
	 * a dbuf lock; then regrab the lock and restart.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
		if (zp->z_dbuf_held) {
			/* dbufs should only be held when force unmounting */
			zp->z_dbuf_held = 0;
			mutex_exit(&zfsvfs->z_znodes_lock);
			dmu_buf_rele(zp->z_dbuf, NULL);
			/* Start again */
			mutex_enter(&zfsvfs->z_znodes_lock);
			nextzp = list_head(&zfsvfs->z_all_znodes);
		}
	}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * Unregister properties.
	 */
	if (!dmu_objset_is_snapshot(os))
		zfs_unregister_callbacks(zfsvfs);

	/*
	 * Switch zfs_inactive to behaviour without an objset.
	 * It just tosses cached pages and frees the znode & vnode.
	 * Then re-enable zfs_inactive threads in that new behaviour.
	 */
	zfsvfs->z_unmounted2 = B_TRUE;
	rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */

	/*
	 * Close the zil. Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	/*
	 * Evict all dbufs so that cached znodes will be freed
	 */
	if (dmu_objset_evict_dbufs(os, 1)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(os, 0);
	}

	/*
	 * Finally close the objset
	 */
	dmu_objset_close(os);

	/*
	 * We can now safely destroy the '.zfs' directory node.
	 */
	if (zfsvfs->z_ctldir != NULL)
		zfsctl_destroy(zfsvfs);

}
Beispiel #28
0
/*ARGSUSED*/
static int
zfs_vfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);	
	objset_t *os = zfsvfs->z_os;
	znode_t	*zp, *nextzp;
	int ret, i;
	int flags;
	
	/*XXX NOEL: delegation admin stuffs, add back if we use delg. admin */
#if 0
	ret = 0; /* UNDEFINED: secpolicy_fs_unmount(cr, vfsp); */
	if (ret) {
		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
		    ZFS_DELEG_PERM_MOUNT, cr);
		if (ret)
			return (ret);
	}

	/*
	 * We purge the parent filesystem's vfsp as the parent filesystem
	 * and all of its snapshots have their vnode's v_vfsp set to the
	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
	 * referential for non-snapshots.
	 */
	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
#endif

	/*
	 * Unmount any snapshots mounted under .zfs before unmounting the
	 * dataset itself.
	 */
#if 0
	if (zfsvfs->z_ctldir != NULL &&
	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
		return (ret);
#endif
	flags = SKIPSYSTEM;
	if (mntflags & MNT_FORCE)
		flags |= FORCECLOSE;

	ret = vflush(mp, NULLVP, flags);

	/*
	 * Mac OS X needs a file system modify time
	 *
	 * We use the mtime of the "com.apple.system.mtime" 
	 * extended attribute, which is associated with the
	 * file system root directory.
	 *
	 * Here we need to release the ref we took on z_mtime_vp during mount.
	 */
	if ((ret == 0) || (mntflags & MNT_FORCE)) {
		if (zfsvfs->z_mtime_vp != NULL) {
			struct vnode *mvp;

			mvp = zfsvfs->z_mtime_vp;
			zfsvfs->z_mtime_vp = NULL;

			if (vnode_get(mvp) == 0) {
				vnode_rele(mvp);
				vnode_recycle(mvp);
				vnode_put(mvp);
			}
		}
	}

	if (!(mntflags & MNT_FORCE)) {
		/*
		 * Check the number of active vnodes in the file system.
		 * Our count is maintained in the vfs structure, but the
		 * number is off by 1 to indicate a hold on the vfs
		 * structure itself.
		 *
		 * The '.zfs' directory maintains a reference of its
		 * own, and any active references underneath are
		 * reflected in the vnode count.
		 */
		
		if (ret)
			return (EBUSY);
#if 0
		if (zfsvfs->z_ctldir == NULL) {
			if (vfsp->vfs_count > 1)
				return (EBUSY);
		} else {
			if (vfsp->vfs_count > 2 ||
			    zfsvfs->z_ctldir->v_count > 1) {
				return (EBUSY);
			}
		}
#endif
	}

	rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER);
	rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER);

	/*
	 * At this point there are no vops active, and any new vops will
	 * fail with EIO since we have z_unmount_lock for writer (only
	 * relavent for forced unmount).
	 *
	 * Release all holds on dbufs.
	 * Note, the dmu can still callback via znode_pageout_func()
	 * which can zfs_znode_free() the znode.  So we lock
	 * z_all_znodes; search the list for a held dbuf; drop the lock
	 * (we know zp can't disappear if we hold a dbuf lock) then
	 * regrab the lock and restart.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
		if (zp->z_dbuf_held) {
			/* dbufs should only be held when force unmounting */
			zp->z_dbuf_held = 0;
			mutex_exit(&zfsvfs->z_znodes_lock);
			dmu_buf_rele(zp->z_dbuf, NULL);
			/* Start again */
			mutex_enter(&zfsvfs->z_znodes_lock);
			nextzp = list_head(&zfsvfs->z_all_znodes);
		}
	}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * Set the unmounted flag and let new vops unblock.
	 * zfs_inactive will have the unmounted behavior, and all other
	 * vops will fail with EIO.
	 */
	zfsvfs->z_unmounted = B_TRUE;
	rw_exit(&zfsvfs->z_unmount_lock);
	rw_exit(&zfsvfs->z_unmount_inactive_lock);

	/*
	 * Unregister properties.
	 */
#ifndef __APPLE__
	if (!dmu_objset_is_snapshot(os))
		zfs_unregister_callbacks(zfsvfs);
#endif
	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	/*
	 * Evict all dbufs so that cached znodes will be freed
	 */
	if (dmu_objset_evict_dbufs(os, B_TRUE)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(os, B_FALSE);
	}

	/*
	 * Finally close the objset
	 */
	dmu_objset_close(os);

	/*
	 * We can now safely destroy the '.zfs' directory node.
	 */
#if 0
	if (zfsvfs->z_ctldir != NULL)
		zfsctl_destroy(zfsvfs);
#endif

	/*
	 * Note that this work is normally done in zfs_freevfs, but since
	 * there is no VOP_FREEVFS in OSX, we free VFS items here
	 */
	OSDecrementAtomic((SInt32 *)&zfs_active_fs_count);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	list_destroy(&zfsvfs->z_all_znodes);
 	rw_destroy(&zfsvfs->z_unmount_lock);
 	rw_destroy(&zfsvfs->z_unmount_inactive_lock);

	return (0);
}


 
struct vnode* vnode_getparent(struct vnode *vp);  /* sys/vnode_internal.h */

static int
zfs_vget_internal(zfsvfs_t *zfsvfs, ino64_t ino, struct vnode **vpp)
{
	struct vnode	*vp;
	struct vnode	*dvp = NULL;
	znode_t		*zp;
	int		error;

	*vpp = NULL;
	
	/*
	 * On Mac OS X we always export the root directory id as 2
	 * and its parent as 1
	 */
	if (ino == 2 || ino == 1)
		ino = zfsvfs->z_root;
	
	if ((error = zfs_zget(zfsvfs, ino, &zp)))
		goto out;

	/* Don't expose EA objects! */
	if (zp->z_phys->zp_flags & ZFS_XATTR) {
		vnode_put(ZTOV(zp));
		error = ENOENT;
		goto out;
	}

	*vpp = vp = ZTOV(zp);

	if (vnode_isvroot(vp))
		goto out;

	/*
	 * If this znode didn't just come from the cache then
	 * it won't have a valid identity (parent and name).
	 *
	 * Manually fix its identity here (normally done by namei lookup).
	 */
	if ((dvp = vnode_getparent(vp)) == NULL) {
		if (zp->z_phys->zp_parent != 0 &&
		    zfs_vget_internal(zfsvfs, zp->z_phys->zp_parent, &dvp)) {
			goto out;
		}
		if ( vnode_isdir(dvp) ) {
			char objname[ZAP_MAXNAMELEN];  /* 256 bytes */
			int flags = VNODE_UPDATE_PARENT;

			/* Look for znode's name in its parent's zap */
			if ( zap_value_search(zfsvfs->z_os,
			                      zp->z_phys->zp_parent, 
			                      zp->z_id,
			                      ZFS_DIRENT_OBJ(-1ULL),
			                      objname) == 0 ) {
				flags |= VNODE_UPDATE_NAME;
			}

			/* Update the znode's parent and name */
			vnode_update_identity(vp, dvp, objname, 0, 0, flags);
		}
	}
	/* All done with znode's parent */
	vnode_put(dvp);
out:
	return (error);
}

/*
 * Get a vnode from a file id (ignoring the generation)
 *
 * Use by NFS Server (readdirplus) and VFS (build_path)
 */
static int
zfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
	int error;

	ZFS_ENTER(zfsvfs);

	/*
	 * On Mac OS X we always export the root directory id as 2.
	 * So we don't expect to see the real root directory id
	 * from zfs_vfs_vget KPI (unless of course the real id was
	 * already 2).
	 */
	if ((ino == zfsvfs->z_root) && (zfsvfs->z_root != 2)) {
		ZFS_EXIT(zfsvfs);
		return (ENOENT);
	}
	error = zfs_vget_internal(zfsvfs, ino, vpp);

	ZFS_EXIT(zfsvfs);
	return (error);
}
Beispiel #29
0
/*
 * dsl_crypto_key_unload
 *
 * Remove the key from the in memory keystore.
 *
 * First we have to remove the minor node for a ZVOL or unmount
 * the filesystem.  This is so that we flush all pending IO for it to disk
 * so we won't need to encrypt anything with this key.  Anything in flight
 * should already have a lock on the keys it needs.
 * We can't assume that userland has already successfully unmounted the
 * dataset though in many cases it will have.
 *
 * If the key can't be removed return the failure back to our caller.
 */
int
dsl_crypto_key_unload(const char *dsname)
{
    dsl_dataset_t *ds;
    objset_t *os;
    int error;
    spa_t *spa;
    dsl_pool_t *dp;
#ifdef _KERNEL
    dmu_objset_type_t os_type;
    //vfs_t *vfsp;
    struct vfsmount *vfsp;
#endif /* _KERNEL */

    error = dsl_pool_hold(dsname, FTAG, &dp);
    if (error != 0)
        return (error);

    /* XXX - should we use own_exclusive() here? */
    if ((error = dsl_dataset_hold(dp, dsname, FTAG, &ds)) != 0) {
        dsl_pool_rele(dp, FTAG);
        return (error);
    }

    if ((error = dmu_objset_from_ds(ds, &os)) != 0) {
        dsl_dataset_rele(ds, FTAG);
        dsl_pool_rele(dp, FTAG);
        return (error);
    }

#ifdef _KERNEL
    /*
     * Make sure that the device node has gone for ZVOLs
     * and that filesystems are umounted.
     */
#if 0 // FIXME
    os_type = dmu_objset_type(os);
    if (os_type == DMU_OST_ZVOL) {
        error = zvol_remove_minor(dsname);
        if (error == ENXIO)
            error = 0;
    } else if (os_type == DMU_OST_ZFS) {
        vfsp = zfs_get_vfs(dsname);
        if (vfsp != NULL) {
            error = vn_vfswlock(vfsp->vfs_vnodecovered);
            VFS_RELE(vfsp);
            if (error == 0)
                error = dounmount(vfsp, 0, CRED());
        }
    }
    if (error != 0) {
        dsl_dataset_rele(ds, FTAG);
        return (error);
    }
#endif

#endif /* _KERNEL */

    /*
     * Make sure all dbufs are synced.
     *
     * It is essential for encrypted datasets to ensure that
     * there is no further pending IO before removing the key.
     */
    if (dmu_objset_is_dirty(os, 0)) // FIXME, 0?
        txg_wait_synced(dmu_objset_pool(os), 0);
    dmu_objset_evict_dbufs(os);

    spa = dsl_dataset_get_spa(ds);
    error = zcrypt_keystore_remove(spa, ds->ds_object);

    dsl_dataset_rele(ds, FTAG);
    dsl_pool_rele(dp, FTAG);
    return (error);
}
Beispiel #30
0
int
zfs_register_callbacks(zfs_sb_t *zsb)
{
	struct dsl_dataset *ds = NULL;
	objset_t *os = zsb->z_os;
	boolean_t do_readonly = B_FALSE;
	int error = 0;

	if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os)))
		do_readonly = B_TRUE;

	/*
	 * Register property callbacks.
	 *
	 * It would probably be fine to just check for i/o error from
	 * the first prop_register(), but I guess I like to go
	 * overboard...
	 */
	ds = dmu_objset_ds(os);
	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
	error = dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
	    zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb);
	error = error ? error : dsl_prop_register(ds,
	    zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb);
	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
	if (error)
		goto unregister;

	if (do_readonly)
		readonly_changed_cb(zsb, B_TRUE);

	return (0);

unregister:
	/*
	 * We may attempt to unregister some callbacks that are not
	 * registered, but this is OK; it will simply return ENOMSG,
	 * which we will ignore.
	 */
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
	    atime_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
	    xattr_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
	    blksz_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
	    readonly_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
	    devices_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
	    setuid_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
	    exec_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
	    snapdir_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
	    acl_inherit_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
	    vscan_changed_cb, zsb);
	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND),
	    nbmand_changed_cb, zsb);

	return (error);
}