Beispiel #1
0
void
fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
{
	dmu_buf_t *db;
	zap_leaf_t *l;
	int i;
	zap_phys_t *zp;

	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
	zap->zap_ismicro = FALSE;

	(void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
	    &zap->zap_f.zap_phys, zap_evict);

	mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
	zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;

	zp = zap->zap_f.zap_phys;
	/*
	 * explicitly zero it since it might be coming from an
	 * initialized microzap
	 */
	bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
	zp->zap_block_type = ZBT_HEADER;
	zp->zap_magic = ZAP_MAGIC;

	zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);

	zp->zap_freeblk = 2;		/* block 1 will be the first leaf */
	zp->zap_num_leafs = 1;
	zp->zap_num_entries = 0;
	zp->zap_salt = zap->zap_salt;
	zp->zap_normflags = zap->zap_normflags;
	zp->zap_flags = flags;

	/* block 1 will be the first leaf */
	for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
		ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;

	/*
	 * set up block 1 - the first leaf
	 */
	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db, tx);

	l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
	l->l_dbuf = db;
	l->l_phys = db->db_data;

	zap_leaf_init(l, zp->zap_normflags != 0);

	kmem_free(l, sizeof (zap_leaf_t));
	dmu_buf_rele(db, FTAG);
}
/*
 * Link zp into dl.  Can only fail if zp has been unlinked.
 */
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
	znode_t *dzp = dl->dl_dzp;
	vnode_t *vp = ZTOV(zp);
	uint64_t value;
	int zp_is_dir = (vp->v_type == VDIR);
	int error;

	dmu_buf_will_dirty(zp->z_dbuf, tx);
	mutex_enter(&zp->z_lock);

	if (!(flag & ZRENAMING)) {
		if (zp->z_unlinked) {	/* no new links to unlinked zp */
			ASSERT(!(flag & (ZNEW | ZEXISTS)));
			mutex_exit(&zp->z_lock);
			return (ENOENT);
		}
		zp->z_phys->zp_links++;
	}
	zp->z_phys->zp_parent = dzp->z_id;	/* dzp is now zp's parent */

	if (!(flag & ZNEW))
		zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
	mutex_exit(&zp->z_lock);

	dmu_buf_will_dirty(dzp->z_dbuf, tx);
	mutex_enter(&dzp->z_lock);
	dzp->z_phys->zp_size++;			/* one dirent added */
	dzp->z_phys->zp_links += zp_is_dir;	/* ".." link from zp */
	zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
	mutex_exit(&dzp->z_lock);

	value = zfs_dirent(zp);
	error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
	    8, 1, &value, tx);
	ASSERT(error == 0);

	dnlc_update(ZTOV(dzp), dl->dl_name, vp);

	return (0);
}
Beispiel #3
0
static int
zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
{
	if (RW_WRITE_HELD(&zap->zap_rwlock))
		return (1);
	if (rw_tryupgrade(&zap->zap_rwlock)) {
		dmu_buf_will_dirty(zap->zap_dbuf, tx);
		return (1);
	}
	return (0);
}
Beispiel #4
0
static int
zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
    dmu_tx_t *tx)
{
	int err;
	uint64_t blk, off;
	int bs = FZAP_BLOCK_SHIFT(zap);
	dmu_buf_t *db;

	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
	ASSERT(tbl->zt_blk != 0);

	dprintf("storing %llx at index %llx\n", val, idx);

	blk = idx >> (bs-3);
	off = idx & ((1<<(bs-3))-1);

	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
	if (err)
		return (err);
	dmu_buf_will_dirty(db, tx);

	if (tbl->zt_nextblk != 0) {
		uint64_t idx2 = idx * 2;
		uint64_t blk2 = idx2 >> (bs-3);
		uint64_t off2 = idx2 & ((1<<(bs-3))-1);
		dmu_buf_t *db2;

		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
		    DMU_READ_NO_PREFETCH);
		if (err) {
			dmu_buf_rele(db, FTAG);
			return (err);
		}
		dmu_buf_will_dirty(db2, tx);
		((uint64_t *)db2->db_data)[off2] = val;
		((uint64_t *)db2->db_data)[off2+1] = val;
		dmu_buf_rele(db2, FTAG);
	}
Beispiel #5
0
static int
zfs_vfs_sync(struct mount *mp, __unused int waitfor, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);

	ZFS_ENTER(zfsvfs);

	/*
	 * Mac OS X needs a file system modify time
	 *
	 * We use the mtime of the "com.apple.system.mtime" 
	 * extended attribute, which is associated with the
	 * file system root directory.
	 *
	 * Here we sync any mtime changes to this attribute.
	 */
	if (zfsvfs->z_mtime_vp != NULL) {
		timestruc_t  mtime;
		znode_t  *zp;
top:
		zp = VTOZ(zfsvfs->z_mtime_vp);
		ZFS_TIME_DECODE(&mtime, zp->z_phys->zp_mtime);
		if (zfsvfs->z_last_mtime_synced < mtime.tv_sec) {
			dmu_tx_t  *tx;
			int  error;

			tx = dmu_tx_create(zfsvfs->z_os);
			dmu_tx_hold_bonus(tx, zp->z_id);
			error = dmu_tx_assign(tx, zfsvfs->z_assign);
			if (error) {
				if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
					dmu_tx_wait(tx);
					dmu_tx_abort(tx);
					goto top;
				}
				dmu_tx_abort(tx);
			} else {
				dmu_buf_will_dirty(zp->z_dbuf, tx);
				dmu_tx_commit(tx);
				zfsvfs->z_last_mtime_synced = mtime.tv_sec;
			}
		}
	}

	if (zfsvfs->z_log != NULL)
		zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
	else
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
	ZFS_EXIT(zfsvfs);

	return (0);
}
Beispiel #6
0
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
{
	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
	znode_t *xzp;
	dmu_tx_t *tx;
	int error;
	zfs_fuid_info_t *fuidp = NULL;

	*xvpp = NULL;

	if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
		return (error);

	tx = dmu_tx_create(zfsvfs->z_os);
	dmu_tx_hold_bonus(tx, zp->z_id);
	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
	if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
		if (zfsvfs->z_fuid_obj == 0) {
			dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
			    FUID_SIZE_ESTIMATE(zfsvfs));
			dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
		} else {
			dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
			dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
			    FUID_SIZE_ESTIMATE(zfsvfs));
		}
	}
	error = dmu_tx_assign(tx, zfsvfs->z_assign);
	if (error) {
		if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
			dmu_tx_wait(tx);
		dmu_tx_abort(tx);
		return (error);
	}
	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
	ASSERT(xzp->z_phys->zp_parent == zp->z_id);
	dmu_buf_will_dirty(zp->z_dbuf, tx);
	zp->z_phys->zp_xattr = xzp->z_id;

	(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
	    xzp, "", NULL, fuidp, vap);
	if (fuidp)
		zfs_fuid_info_free(fuidp);
	dmu_tx_commit(tx);

	*xvpp = ZTOV(xzp);

	return (0);
}
Beispiel #7
0
static void
dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
{
	dsl_pool_t *dp = ds->ds_dir->dd_pool;
	objset_t *mos = dp->dp_meta_objset;
	uint64_t zapobj;

	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));

	if (ds->ds_phys->ds_userrefs_obj == 0) {
		/*
		 * This is the first user hold for this dataset.  Create
		 * the userrefs zap object.
		 */
		dmu_buf_will_dirty(ds->ds_dbuf, tx);
		zapobj = ds->ds_phys->ds_userrefs_obj =
		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
	} else {
		zapobj = ds->ds_phys->ds_userrefs_obj;
	}
	ds->ds_userrefs++;

	VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));

	if (minor != 0) {
		char name[MAXNAMELEN];
		nvlist_t *tags;

		VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
		    htag, now, tx));
		(void) snprintf(name, sizeof (name), "%llx",
		    (u_longlong_t)ds->ds_object);

		if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
			VERIFY0(nvlist_alloc(&tags, NV_UNIQUE_NAME,
			    KM_PUSHPAGE));
			fnvlist_add_boolean(tags, htag);
			fnvlist_add_nvlist(tmpholds, name, tags);
			fnvlist_free(tags);
		} else {
			fnvlist_add_boolean(tags, htag);
		}
	}

	spa_history_log_internal_ds(ds, "hold", tx,
	    "tag=%s temp=%d refs=%llu",
	    htag, minor != 0, ds->ds_userrefs);
}
Beispiel #8
0
int
dmu_objset_userspace_upgrade(objset_t *os)
{
	uint64_t obj;
	int err = 0;

	if (dmu_objset_userspace_present(os))
		return (0);
	if (!dmu_objset_userused_enabled(os->os))
		return (ENOTSUP);
	if (dmu_objset_is_snapshot(os))
		return (EINVAL);

	/*
	 * We simply need to mark every object dirty, so that it will be
	 * synced out and now accounted.  If this is called
	 * concurrently, or if we already did some work before crashing,
	 * that's fine, since we track each object's accounted state
	 * independently.
	 */

	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
		dmu_tx_t *tx;
		dmu_buf_t *db;
		int objerr;

		if (issig(JUSTLOOKING) && issig(FORREAL))
			return (EINTR);

		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
		if (objerr)
			continue;
		tx = dmu_tx_create(os);
		dmu_tx_hold_bonus(tx, obj);
		objerr = dmu_tx_assign(tx, TXG_WAIT);
		if (objerr) {
			dmu_tx_abort(tx);
			continue;
		}
		dmu_buf_will_dirty(db, tx);
		dmu_buf_rele(db, FTAG);
		dmu_tx_commit(tx);
	}

	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
	txg_wait_synced(dmu_objset_pool(os), 0);
	return (0);
}
void
bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
{
	mutex_enter(&bpl->bpl_lock);
	ASSERT3P(bpl->bpl_queue, ==, NULL);
	VERIFY(0 == bplist_hold(bpl));
	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
	    bpl->bpl_object, 0, -1ULL, tx));
	bpl->bpl_phys->bpl_entries = 0;
	bpl->bpl_phys->bpl_bytes = 0;
	if (bpl->bpl_havecomp) {
		bpl->bpl_phys->bpl_comp = 0;
		bpl->bpl_phys->bpl_uncomp = 0;
	}
	mutex_exit(&bpl->bpl_lock);
}
Beispiel #10
0
void
dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	dmu_buf_will_dirty(dd->dd_dbuf, tx);

	mutex_enter(&dd->dd_lock);
	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
	mutex_exit(&dd->dd_lock);

	/* release the hold from dsl_dir_dirty */
	dmu_buf_rele(dd->dd_dbuf, dd);
}
Beispiel #11
0
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
{
	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
	znode_t *xzp;
	dmu_tx_t *tx;
	uint64_t xoid;
	int error;

	*xvpp = NULL;

#ifndef __APPLE__
	/* In Mac OS X access preflighting is done above the file system. */
	if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr))
		return (error);
#endif /*!__APPLE__*/

	tx = dmu_tx_create(zfsvfs->z_os);
	dmu_tx_hold_bonus(tx, zp->z_id);
	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
	error = dmu_tx_assign(tx, zfsvfs->z_assign);
	if (error) {
		if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
			dmu_tx_wait(tx);
		dmu_tx_abort(tx);
		return (error);
	}
	zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0);
	ASSERT(xzp->z_id == xoid);
	ASSERT(xzp->z_phys->zp_parent == zp->z_id);
	dmu_buf_will_dirty(zp->z_dbuf, tx);
	zp->z_phys->zp_xattr = xoid;

	(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "");
	dmu_tx_commit(tx);
#ifdef __APPLE__
	/*
	 * Obtain and attach the vnode after committing the transaction
	 */
	zfs_attach_vnode(xzp);
#endif
	*xvpp = ZTOV(xzp);

	return (0);
}
Beispiel #12
0
static void
dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
{
    dsl_deleg_arg_t *dda = arg;
    dsl_dir_t *dd;
    dsl_pool_t *dp = dmu_tx_pool(tx);
    objset_t *mos = dp->dp_meta_objset;
    nvpair_t *whopair = NULL;
    uint64_t zapobj;

    VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));

    zapobj = dd->dd_phys->dd_deleg_zapobj;
    if (zapobj == 0) {
        dmu_buf_will_dirty(dd->dd_dbuf, tx);
        zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
                                                DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
    }

    while ((whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair))) {
        const char *whokey = nvpair_name(whopair);
        nvlist_t *perms;
        nvpair_t *permpair = NULL;
        uint64_t jumpobj;

        perms = fnvpair_value_nvlist(whopair);

        if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
            jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
                                      zapobj, whokey, tx);
        }

        while ((permpair = nvlist_next_nvpair(perms, permpair))) {
            const char *perm = nvpair_name(permpair);
            uint64_t n = 0;

            VERIFY(zap_update(mos, jumpobj,
                              perm, 8, 1, &n, tx) == 0);
            spa_history_log_internal_dd(dd, "permission update", tx,
                                        "%s %s", whokey, perm);
        }
    }
    dsl_dir_rele(dd, FTAG);
}
Beispiel #13
0
static void
dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	nvlist_t *nvp = arg2;
	objset_t *mos = dd->dd_pool->dp_meta_objset;
	nvpair_t *whopair = NULL;
	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;

	if (zapobj == 0) {
		dmu_buf_will_dirty(dd->dd_dbuf, tx);
		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
	}

	while (whopair = nvlist_next_nvpair(nvp, whopair)) {
		const char *whokey = nvpair_name(whopair);
		nvlist_t *perms;
		nvpair_t *permpair = NULL;
		uint64_t jumpobj;

		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);

		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
			jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
			    DMU_OT_NONE, 0, tx);
			VERIFY(zap_update(mos, zapobj,
			    whokey, 8, 1, &jumpobj, tx) == 0);
		}

		while (permpair = nvlist_next_nvpair(perms, permpair)) {
			const char *perm = nvpair_name(permpair);
			uint64_t n = 0;

			VERIFY(zap_update(mos, jumpobj,
			    perm, 8, 1, &n, tx) == 0);
			spa_history_internal_log(LOG_DS_PERM_UPDATE,
			    dd->dd_pool->dp_spa, tx, cr,
			    "%s %s dataset = %llu", whokey, perm,
			    dd->dd_phys->dd_head_dataset_obj);
		}
	}
}
Beispiel #14
0
static void
copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
    boolean_t dosets, uint64_t uid, dmu_tx_t *tx)
{
	objset_t *mos = dd->dd_pool->dp_meta_objset;
	uint64_t jumpobj, pjumpobj;
	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
	zap_cursor_t zc;
	zap_attribute_t za;
	char whokey[ZFS_MAX_DELEG_NAME];

	zfs_deleg_whokey(whokey,
	    dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE,
	    ZFS_DELEG_LOCAL, NULL);
	if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0)
		return;

	if (zapobj == 0) {
		dmu_buf_will_dirty(dd->dd_dbuf, tx);
		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
	}

	zfs_deleg_whokey(whokey,
	    dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER,
	    ZFS_DELEG_LOCAL, &uid);
	if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) {
		jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
		VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0);
	}

	for (zap_cursor_init(&zc, mos, pjumpobj);
	    zap_cursor_retrieve(&zc, &za) == 0;
	    zap_cursor_advance(&zc)) {
		uint64_t zero = 0;
		ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1);

		VERIFY(zap_update(mos, jumpobj, za.za_name,
		    8, 1, &zero, tx) == 0);
	}
	zap_cursor_fini(&zc);
}
Beispiel #15
0
static void
dsl_bookmark_destroy_sync(void *arg, dmu_tx_t *tx)
{
	dsl_bookmark_destroy_arg_t *dbda = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	objset_t *mos = dp->dp_meta_objset;
	nvpair_t *pair;

	for (pair = nvlist_next_nvpair(dbda->dbda_success, NULL);
	    pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_success, pair)) {
		dsl_dataset_t *ds;
		char *shortname;
		uint64_t zap_cnt;

		VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
		    &ds, FTAG, &shortname));
		VERIFY0(dsl_dataset_bookmark_remove(ds, shortname, tx));

		/*
		 * If all of this dataset's bookmarks have been destroyed,
		 * free the zap object and decrement the feature's use count.
		 */
		VERIFY0(zap_count(mos, ds->ds_bookmarks,
		    &zap_cnt));
		if (zap_cnt == 0) {
			dmu_buf_will_dirty(ds->ds_dbuf, tx);
			VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
			ds->ds_bookmarks = 0;
			spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
			VERIFY0(zap_remove(mos, ds->ds_object,
			    DS_FIELD_BOOKMARK_NAMES, tx));
		}

		spa_history_log_internal_ds(ds, "remove bookmark", tx,
		    "name=%s", shortname);

		dsl_dataset_rele(ds, FTAG);
	}
}
Beispiel #16
0
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
{
	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
	znode_t *xzp;
	dmu_tx_t *tx;
	uint64_t xoid;
	int error;

	*xvpp = NULL;

	if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr))
		return (error);

	tx = dmu_tx_create(zfsvfs->z_os);
	dmu_tx_hold_bonus(tx, zp->z_id);
	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
	error = dmu_tx_assign(tx, zfsvfs->z_assign);
	if (error) {
		if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
			dmu_tx_wait(tx);
		dmu_tx_abort(tx);
		return (error);
	}
	zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0);
	ASSERT(xzp->z_id == xoid);
	ASSERT(xzp->z_phys->zp_parent == zp->z_id);
	dmu_buf_will_dirty(zp->z_dbuf, tx);
	zp->z_phys->zp_xattr = xoid;

	(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "");
	dmu_tx_commit(tx);

	*xvpp = ZTOV(xzp);

	return (0);
}
Beispiel #17
0
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
    dmu_tx_t *tx)
{
	objset_t *mos = dp->dp_meta_objset;
	uint64_t ddobj;
	dsl_dir_phys_t *ddphys;
	dmu_buf_t *dbuf;

	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
	if (pds) {
		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
		    name, sizeof (uint64_t), 1, &ddobj, tx));
	} else {
		/* it's the root dir */
		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
	}
	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
	dmu_buf_will_dirty(dbuf, tx);
	ddphys = dbuf->db_data;

	ddphys->dd_creation_time = gethrestime_sec();
	if (pds)
		ddphys->dd_parent_obj = pds->dd_object;
	ddphys->dd_props_zapobj = zap_create(mos,
	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
	ddphys->dd_child_dir_zapobj = zap_create(mos,
	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
	dmu_buf_rele(dbuf, FTAG);

	return (ddobj);
}
Beispiel #18
0
static int
zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
    void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
    dmu_tx_t *tx)
{
	uint64_t b, newblk;
	dmu_buf_t *db_old, *db_new;
	int err;
	int bs = FZAP_BLOCK_SHIFT(zap);
	int hepb = 1<<(bs-4);
	/* hepb = half the number of entries in a block */

	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
	ASSERT(tbl->zt_blk != 0);
	ASSERT(tbl->zt_numblks > 0);

	if (tbl->zt_nextblk != 0) {
		newblk = tbl->zt_nextblk;
	} else {
		newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
		tbl->zt_nextblk = newblk;
		ASSERT0(tbl->zt_blks_copied);
		dmu_prefetch(zap->zap_objset, zap->zap_object,
		    tbl->zt_blk << bs, tbl->zt_numblks << bs);
	}

	/*
	 * Copy the ptrtbl from the old to new location.
	 */

	b = tbl->zt_blks_copied;
	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
	if (err)
		return (err);

	/* first half of entries in old[b] go to new[2*b+0] */
	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db_new, tx);
	transfer_func(db_old->db_data, db_new->db_data, hepb);
	dmu_buf_rele(db_new, FTAG);

	/* second half of entries in old[b] go to new[2*b+1] */
	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db_new, tx);
	transfer_func((uint64_t *)db_old->db_data + hepb,
	    db_new->db_data, hepb);
	dmu_buf_rele(db_new, FTAG);

	dmu_buf_rele(db_old, FTAG);

	tbl->zt_blks_copied++;

	dprintf("copied block %llu of %llu\n",
	    tbl->zt_blks_copied, tbl->zt_numblks);

	if (tbl->zt_blks_copied == tbl->zt_numblks) {
		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
		    tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);

		tbl->zt_blk = newblk;
		tbl->zt_numblks *= 2;
		tbl->zt_shift++;
		tbl->zt_nextblk = 0;
		tbl->zt_blks_copied = 0;

		dprintf("finished; numblocks now %llu (%lluk entries)\n",
		    tbl->zt_numblks, 1<<(tbl->zt_shift-10));
	}

	return (0);
}
Beispiel #19
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
	nvlist_t	*nvl = arg;
	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename);

	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		zfs_dbgmsg("command: %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		} else {
			zfs_dbgmsg("txg %lld %s %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		}
		/*
		 * The history sysevent is posted only for internal history
		 * messages to show what has happened, not how it happened. For
		 * example, the following command:
		 *
		 * # zfs destroy -r tank/foo
		 *
		 * will result in one sysevent posted per dataset that is
		 * destroyed as a result of the command - which could be more
		 * than one event in total.  By contrast, if the sysevent was
		 * posted as a result of the ZPOOL_HIST_CMD key being present
		 * it would result in only one sysevent being posted with the
		 * full command line arguments, requiring the consumer to know
		 * how to parse and understand zfs(1M) command invocations.
		 */
		spa_history_log_notify(spa, nvl);
	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
		zfs_dbgmsg("ioctl %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
	}

	VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE,
	    KM_SLEEP), ==, 0);

	mutex_enter(&spa->spa_history_lock);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	/* The first command is the create, which we keep forever */
	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
	}

	mutex_exit(&spa->spa_history_lock);
	fnvlist_pack_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);
	fnvlist_free(nvl);
}
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;
	int		vfslocked;

	vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);

	ASSERT(zp->z_phys->zp_links == 0);

	/*
	 * If this is a ZIL replay then leave the object in the unlinked set.
	 * Otherwise we can get a deadlock, because the delete can be
	 * quite large and span multiple tx's and txgs, but each replay
	 * creates a tx to atomically run the replay function and mark the
	 * replay record as complete. We deadlock trying to start a tx in
	 * a new txg to further the deletion but can't because the replay
	 * tx hasn't finished.
	 *
	 * We actually delete the object if we get a failure to create an
	 * object in zil_replay_log_record(), or after calling zil_replay().
	 */
	if (zfsvfs->z_assign >= TXG_INITIAL) {
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
	    (zp->z_phys->zp_flags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);
			zfs_znode_free(zp);
			VFS_UNLOCK_GIANT(vfslocked);
			return;
		}
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		goto out;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		VN_RELE(ZTOV(xzp));
	VFS_UNLOCK_GIANT(vfslocked);
}
Beispiel #21
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	spa_t		*spa = arg1;
	history_arg_t	*hap = arg2;
	const char	*history_str = hap->ha_history_str;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	nvlist_t	*nvrecord;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
	    gethrestime_sec()) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
	if (hap->ha_zone != NULL)
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
		    hap->ha_zone) == 0);
#ifdef _KERNEL
	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
	    utsname.nodename) == 0);
#endif
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
	    hap->ha_log_type == LOG_CMD_NORMAL) {
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
		    history_str) == 0);

		zfs_dbgmsg("command: %s", history_str);
	} else {
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
		    hap->ha_event) == 0);
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
		    tx->tx_txg) == 0);
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
		    history_str) == 0);

		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
		    zfs_history_event_names[hap->ha_event], spa_name(spa),
		    (longlong_t)tx->tx_txg, history_str);

	}

	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
	record_packed = kmem_alloc(reclen, KM_SLEEP);

	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);

	mutex_enter(&spa->spa_history_lock);
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
		shpp->sh_bof = shpp->sh_pool_create_len;
	}

	mutex_exit(&spa->spa_history_lock);
	nvlist_free(nvrecord);
	kmem_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);

	strfree(hap->ha_history_str);
	if (hap->ha_zone != NULL)
		strfree(hap->ha_zone);
	kmem_free(hap, sizeof (history_arg_t));
}
Beispiel #22
0
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
#ifdef ZFS_DEBUG
	int err;
#endif
	spa_feature_t f;
	int after_branch_point = FALSE;
	dsl_pool_t *dp = ds->ds_dir->dd_pool;
	objset_t *mos = dp->dp_meta_objset;
	dsl_dataset_t *ds_prev = NULL;
	uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0;
	dsl_dataset_t *ds_next, *ds_head, *hds;


	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
	ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
	ASSERT(refcount_is_zero(&ds->ds_longholds));

	if (defer &&
	    (ds->ds_userrefs > 0 ||
	    dsl_dataset_phys(ds)->ds_num_children > 1)) {
		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
		dmu_buf_will_dirty(ds->ds_dbuf, tx);
		dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
		return;
	}

	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);

	/* We need to log before removing it from the namespace. */
	spa_history_log_internal_ds(ds, "destroy", tx, "");

	dsl_scan_ds_destroyed(ds, tx);

	obj = ds->ds_object;

	for (f = 0; f < SPA_FEATURES; f++) {
		if (ds->ds_feature_inuse[f]) {
			dsl_dataset_deactivate_feature(obj, f, tx);
			ds->ds_feature_inuse[f] = B_FALSE;
		}
	}
	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
		ASSERT3P(ds->ds_prev, ==, NULL);
		VERIFY0(dsl_dataset_hold_obj(dp,
		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
		after_branch_point =
		    (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);

		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
		if (after_branch_point &&
		    dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
			if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
				VERIFY0(zap_add_int(mos,
				    dsl_dataset_phys(ds_prev)->
				    ds_next_clones_obj,
				    dsl_dataset_phys(ds)->ds_next_snap_obj,
				    tx));
			}
		}
		if (!after_branch_point) {
			dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
			    dsl_dataset_phys(ds)->ds_next_snap_obj;
		}
	}
Beispiel #23
0
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;

	ASSERT(ZTOV(zp)->v_count == 0);
	ASSERT(zp->z_phys->zp_links == 0);

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);
			zfs_znode_free(zp);
			return;
		}
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		goto out;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		VN_RELE(ZTOV(xzp));
}
Beispiel #24
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
	nvlist_t	*nvl = arg;
	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
#ifdef _KERNEL
	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
#endif
	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		zfs_dbgmsg("command: %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		} else {
			zfs_dbgmsg("txg %lld %s %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		}
	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
		zfs_dbgmsg("ioctl %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
	}

	record_packed = fnvlist_pack(nvl, &reclen);

	mutex_enter(&spa->spa_history_lock);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	/* The first command is the create, which we keep forever */
	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
	}

	mutex_exit(&spa->spa_history_lock);
	fnvlist_pack_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);
	fnvlist_free(nvl);
}
Beispiel #25
0
/*
 * sync out AVL trees to persistent storage.
 */
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
#ifdef HAVE_ZPL
	nvlist_t *nvp;
	nvlist_t **fuids;
	size_t nvsize = 0;
	char *packed;
	dmu_buf_t *db;
	fuid_domain_t *domnode;
	int numnodes;
	int i;

	if (!zfsvfs->z_fuid_dirty) {
		return;
	}

	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);

	/*
	 * First see if table needs to be created?
	 */
	if (zfsvfs->z_fuid_obj == 0) {
		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
		    sizeof (uint64_t), tx);
		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
		    &zfsvfs->z_fuid_obj, tx) == 0);
	}

	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
		    domnode->f_idx) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
		    domnode->f_ksid->kd_name) == 0);
	}
	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
	    fuids, numnodes) == 0);
	for (i = 0; i != numnodes; i++)
		nvlist_free(fuids[i]);
	kmem_free(fuids, numnodes * sizeof (void *));
	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
	packed = kmem_alloc(nvsize, KM_SLEEP);
	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);
	nvlist_free(nvp);
	zfsvfs->z_fuid_size = nvsize;
	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
	    zfsvfs->z_fuid_size, packed, tx);
	kmem_free(packed, zfsvfs->z_fuid_size);
	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
	    FTAG, &db));
	dmu_buf_will_dirty(db, tx);
	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
	dmu_buf_rele(db, FTAG);

	zfsvfs->z_fuid_dirty = B_FALSE;
	rw_exit(&zfsvfs->z_fuid_lock);
#endif /* HAVE_ZPL */
}
Beispiel #26
0
/*
 * Main attribute lookup/update function
 * returns 0 for success or non zero for failures
 *
 * Operates on bulk array, first failure will abort further processing
 */
int
sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
    sa_data_op_t data_op, dmu_tx_t *tx)
{
	sa_os_t *sa = hdl->sa_os->os_sa;
	int i;
	int error = 0;
	sa_buf_type_t buftypes;

	buftypes = 0;

	ASSERT(count > 0);
	for (i = 0; i != count; i++) {
		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);

		bulk[i].sa_addr = NULL;
		/* First check the bonus buffer */

		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
			    SA_GET_HDR(hdl, SA_BONUS),
			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
			if (tx && !(buftypes & SA_BONUS)) {
				dmu_buf_will_dirty(hdl->sa_bonus, tx);
				buftypes |= SA_BONUS;
			}
		}
		if (bulk[i].sa_addr == NULL &&
		    ((error = sa_get_spill(hdl)) == 0)) {
			if (TOC_ATTR_PRESENT(
			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
				    SA_GET_HDR(hdl, SA_SPILL),
				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
				if (tx && !(buftypes & SA_SPILL) &&
				    bulk[i].sa_size == bulk[i].sa_length) {
					dmu_buf_will_dirty(hdl->sa_spill, tx);
					buftypes |= SA_SPILL;
				}
			}
		}
		if (error && error != ENOENT) {
			return ((error == ECKSUM) ? EIO : error);
		}

		switch (data_op) {
		case SA_LOOKUP:
			if (bulk[i].sa_addr == NULL)
				return (ENOENT);
			if (bulk[i].sa_data) {
				SA_COPY_DATA(bulk[i].sa_data_func,
				    bulk[i].sa_addr, bulk[i].sa_data,
				    bulk[i].sa_size);
			}
			continue;

		case SA_UPDATE:
			/* existing rewrite of attr */
			if (bulk[i].sa_addr &&
			    bulk[i].sa_size == bulk[i].sa_length) {
				SA_COPY_DATA(bulk[i].sa_data_func,
				    bulk[i].sa_data, bulk[i].sa_addr,
				    bulk[i].sa_length);
				continue;
			} else if (bulk[i].sa_addr) { /* attr size change */
				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
				    SA_REPLACE, bulk[i].sa_data_func,
				    bulk[i].sa_data, bulk[i].sa_length, tx);
			} else { /* adding new attribute */
				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
				    SA_ADD, bulk[i].sa_data_func,
				    bulk[i].sa_data, bulk[i].sa_length, tx);
			}
			if (error)
				return (error);
			break;
		}
	}
	return (error);
}
/*
 * Unlink zp from dl, and mark zp for deletion if this was the last link.
 * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
 * If it's non-NULL, we use it to indicate whether the znode needs deletion,
 * and it's the caller's job to do it.
 */
int
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
	boolean_t *unlinkedp)
{
	znode_t *dzp = dl->dl_dzp;
	vnode_t *vp = ZTOV(zp);
	int zp_is_dir = (vp->v_type == VDIR);
	boolean_t unlinked = B_FALSE;
	int error;

	dnlc_remove(ZTOV(dzp), dl->dl_name);

	if (!(flag & ZRENAMING)) {
		dmu_buf_will_dirty(zp->z_dbuf, tx);

		if (vn_vfswlock(vp))		/* prevent new mounts on zp */
			return (EBUSY);

		if (vn_ismntpt(vp)) {		/* don't remove mount point */
			vn_vfsunlock(vp);
			return (EBUSY);
		}

		mutex_enter(&zp->z_lock);
		if (zp_is_dir && !zfs_dirempty(zp)) {	/* dir not empty */
			mutex_exit(&zp->z_lock);
			vn_vfsunlock(vp);
			return (ENOTEMPTY);
		}
		if (zp->z_phys->zp_links <= zp_is_dir) {
			zfs_panic_recover("zfs: link count on vnode %p is %u, "
			    "should be at least %u", zp->z_vnode,
			    (int)zp->z_phys->zp_links,
			    zp_is_dir + 1);
			zp->z_phys->zp_links = zp_is_dir + 1;
		}
		if (--zp->z_phys->zp_links == zp_is_dir) {
			zp->z_unlinked = B_TRUE;
			zp->z_phys->zp_links = 0;
			unlinked = B_TRUE;
		} else {
			zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
		}
		mutex_exit(&zp->z_lock);
		vn_vfsunlock(vp);
	}

	dmu_buf_will_dirty(dzp->z_dbuf, tx);
	mutex_enter(&dzp->z_lock);
	dzp->z_phys->zp_size--;			/* one dirent removed */
	dzp->z_phys->zp_links -= zp_is_dir;	/* ".." link from zp */
	zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
	mutex_exit(&dzp->z_lock);

	if (zp->z_zfsvfs->z_norm) {
		if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
		    (flag & ZCIEXACT)) ||
		    ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
		    !(flag & ZCILOOK)))
			error = zap_remove_norm(zp->z_zfsvfs->z_os,
			    dzp->z_id, dl->dl_name, MT_EXACT, tx);
		else
			error = zap_remove_norm(zp->z_zfsvfs->z_os,
			    dzp->z_id, dl->dl_name, MT_FIRST, tx);
	} else {
		error = zap_remove(zp->z_zfsvfs->z_os,
		    dzp->z_id, dl->dl_name, tx);
	}
	ASSERT(error == 0);

	if (unlinkedp != NULL)
		*unlinkedp = unlinked;
	else if (unlinked)
		zfs_unlinked_add(zp, tx);

	return (0);
}
Beispiel #28
0
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	char		obj_name[17];
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;

	ASSERT(ZTOV(zp)->v_count == 0);
	ASSERT(zp->z_phys->zp_links == 0);

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR))
		if (zfs_purgedir(zp) != 0) {
			zfs_delete_t *delq = &zfsvfs->z_delete_head;
			/*
			 * Add this back to the delete list to be retried later.
			 *
			 * XXX - this could just busy loop on us...
			 */
			mutex_enter(&delq->z_mutex);
			list_insert_tail(&delq->z_znodes, zp);
			delq->z_znode_count++;
			mutex_exit(&delq->z_mutex);
			return;
		}

	/*
	 * If the file has extended attributes, unlink the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		zfs_delete_t *delq = &zfsvfs->z_delete_head;

		dmu_tx_abort(tx);
		/*
		 * Add this back to the delete list to be retried later.
		 *
		 * XXX - this could just busy loop on us...
		 */
		mutex_enter(&delq->z_mutex);
		list_insert_tail(&delq->z_znodes, zp);
		delq->z_znode_count++;
		mutex_exit(&delq->z_mutex);
		return;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_reap = 1;		/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_dq_add(xzp, tx);		/* add xzp to delete queue */
	}

	/*
	 * Remove this znode from delete queue
	 */
	error = zap_remove(os, zfsvfs->z_dqueue,
	    zfs_dq_hexname(obj_name, zp->z_id), tx);
	ASSERT3U(error, ==, 0);

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);

	if (xzp)
		VN_RELE(ZTOV(xzp));
}
Beispiel #29
0
static
#endif
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
    dmu_tx_t *tx)
{
	fuid_domain_t searchnode, *findnode;
	avl_index_t loc;

	/*
	 * If the dummy "nobody" domain then return an index of 0
	 * to cause the created FUID to be a standard POSIX id
	 * for the user nobody.
	 */
	if (domain[0] == '\0') {
		*retdomain = "";
		return (0);
	}

	searchnode.f_ksid = ksid_lookupdomain(domain);
	if (retdomain) {
		*retdomain = searchnode.f_ksid->kd_name;
	}
	if (!zfsvfs->z_fuid_loaded)
		zfs_fuid_init(zfsvfs, tx);

	rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
	findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
	rw_exit(&zfsvfs->z_fuid_lock);

	if (findnode) {
		ksiddomain_rele(searchnode.f_ksid);
		return (findnode->f_idx);
	} else {
		fuid_domain_t *domnode;
		nvlist_t *nvp;
		nvlist_t **fuids;
		uint64_t retidx;
		size_t nvsize = 0;
		char *packed;
		dmu_buf_t *db;
		int i = 0;

		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
		domnode->f_ksid = searchnode.f_ksid;

		rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
		retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;

		avl_add(&zfsvfs->z_fuid_domain, domnode);
		avl_add(&zfsvfs->z_fuid_idx, domnode);
		/*
		 * Now resync the on-disk nvlist.
		 */
		VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

		domnode = avl_first(&zfsvfs->z_fuid_domain);
		fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
		while (domnode) {
			VERIFY(nvlist_alloc(&fuids[i],
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
			VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
			    domnode->f_idx) == 0);
			VERIFY(nvlist_add_uint64(fuids[i],
			    FUID_OFFSET, 0) == 0);
			VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
			    domnode->f_ksid->kd_name) == 0);
			domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
		}
		VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
		    fuids, retidx) == 0);
		for (i = 0; i != retidx; i++)
			nvlist_free(fuids[i]);
		kmem_free(fuids, retidx * sizeof (void *));
		VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
		packed = kmem_alloc(nvsize, KM_SLEEP);
		VERIFY(nvlist_pack(nvp, &packed, &nvsize,
		    NV_ENCODE_XDR, KM_SLEEP) == 0);
		nvlist_free(nvp);
		zfsvfs->z_fuid_size = nvsize;
		dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
		    zfsvfs->z_fuid_size, packed, tx);
		kmem_free(packed, zfsvfs->z_fuid_size);
		VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
		    FTAG, &db));
		dmu_buf_will_dirty(db, tx);
		*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
		dmu_buf_rele(db, FTAG);

		rw_exit(&zfsvfs->z_fuid_lock);
		return (retidx);
	}
}
Beispiel #30
0
/*
 * Find layout that corresponds to ordering of attributes
 * If not found a new layout number is created and added to
 * persistent layout tables.
 */
static int
sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
    dmu_tx_t *tx)
{
	sa_os_t *sa = hdl->sa_os->os_sa;
	uint64_t hash;
	sa_buf_type_t buftype;
	sa_hdr_phys_t *sahdr;
	void *data_start;
	int buf_space;
	sa_attr_type_t *attrs, *attrs_start;
	int i, lot_count;
	int hdrsize, spillhdrsize;
	int used;
	dmu_object_type_t bonustype;
	sa_lot_t *lot;
	int len_idx;
	int spill_used;
	boolean_t spilling;

	dmu_buf_will_dirty(hdl->sa_bonus, tx);
	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);

	/* first determine bonus header size and sum of all attributes */
	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
	    SA_BONUS, &i, &used, &spilling);

	if (used > SPA_MAXBLOCKSIZE)
		return (EFBIG);

	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
	    used + hdrsize, tx));

	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
	    bonustype == DMU_OT_SA);

	/* setup and size spill buffer when needed */
	if (spilling) {
		boolean_t dummy;

		if (hdl->sa_spill == NULL) {
			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
			    &hdl->sa_spill) == 0);
		}
		dmu_buf_will_dirty(hdl->sa_spill, tx);

		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
		    &spill_used, &dummy);

		if (spill_used > SPA_MAXBLOCKSIZE)
			return (EFBIG);

		buf_space = hdl->sa_spill->db_size - spillhdrsize;
		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
		    hdl->sa_spill->db_size)
			VERIFY(0 == sa_resize_spill(hdl,
			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
	}

	/* setup starting pointers to lay down data */
	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
	buftype = SA_BONUS;

	if (spilling)
		buf_space = (sa->sa_force_spill) ?
		    0 : SA_BLKPTR_SPACE - hdrsize;
	else
		buf_space = hdl->sa_bonus->db_size - hdrsize;

	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
	    KM_SLEEP);
	lot_count = 0;

	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
		uint16_t length;

		attrs[i] = attr_desc[i].sa_attr;
		length = SA_REGISTERED_LEN(sa, attrs[i]);
		if (length == 0)
			length = attr_desc[i].sa_length;

		if (buf_space < length) {  /* switch to spill buffer */
			VERIFY(bonustype == DMU_OT_SA);
			if (buftype == SA_BONUS && !sa->sa_force_spill) {
				sa_find_layout(hdl->sa_os, hash, attrs_start,
				    lot_count, tx, &lot);
				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
			}

			buftype = SA_SPILL;
			hash = -1ULL;
			len_idx = 0;

			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
			sahdr->sa_magic = SA_MAGIC;
			data_start = (void *)((uintptr_t)sahdr +
			    spillhdrsize);
			attrs_start = &attrs[i];
			buf_space = hdl->sa_spill->db_size - spillhdrsize;
			lot_count = 0;
		}
		hash ^= SA_ATTR_HASH(attrs[i]);
		attr_desc[i].sa_addr = data_start;
		attr_desc[i].sa_size = length;
		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
		    data_start, length);
		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
			sahdr->sa_lengths[len_idx++] = length;
		}
		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
		    length), 8);
		buf_space -= P2ROUNDUP(length, 8);
		lot_count++;
	}

	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);

	/*
	 * Verify that old znodes always have layout number 0.
	 * Must be DMU_OT_SA for arbitrary layouts
	 */
	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
	    (bonustype == DMU_OT_SA && lot->lot_num > 1));

	if (bonustype == DMU_OT_SA) {
		SA_SET_HDR(sahdr, lot->lot_num,
		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
	}

	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
	if (hdl->sa_bonus_tab) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
		hdl->sa_bonus_tab = NULL;
	}
	if (!sa->sa_force_spill)
		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
	if (hdl->sa_spill) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
		if (!spilling) {
			/*
			 * remove spill block that is no longer needed.
			 */
			dmu_buf_rele(hdl->sa_spill, NULL);
			hdl->sa_spill = NULL;
			hdl->sa_spill_tab = NULL;
			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
			    sa_handle_object(hdl), tx));
		} else {
			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
		}
	}

	return (0);
}