Exemple #1
0
void
fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
{
	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
	zap->zap_ismicro = FALSE;

	zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
	zap->zap_dbu.dbu_evict_func_async = NULL;

	mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
	zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;

	zap_phys_t *zp = zap_f_phys(zap);
	/*
	 * explicitly zero it since it might be coming from an
	 * initialized microzap
	 */
	bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
	zp->zap_block_type = ZBT_HEADER;
	zp->zap_magic = ZAP_MAGIC;

	zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);

	zp->zap_freeblk = 2;		/* block 1 will be the first leaf */
	zp->zap_num_leafs = 1;
	zp->zap_num_entries = 0;
	zp->zap_salt = zap->zap_salt;
	zp->zap_normflags = zap->zap_normflags;
	zp->zap_flags = flags;

	/* block 1 will be the first leaf */
	for (int i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
		ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;

	/*
	 * set up block 1 - the first leaf
	 */
	dmu_buf_t *db;
	VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db, tx);

	zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
	l->l_dbuf = db;

	zap_leaf_init(l, zp->zap_normflags != 0);

	kmem_free(l, sizeof (zap_leaf_t));
	dmu_buf_rele(db, FTAG);
}
Exemple #2
0
/*
 * load initial fuid domain and idx trees.  This function is used by
 * both the kernel and zdb.
 */
uint64_t
zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
    avl_tree_t *domain_tree)
{
	dmu_buf_t *db;
	uint64_t fuid_size;

	ASSERT(fuid_obj != 0);
	VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
	    FTAG, &db));
	fuid_size = *(uint64_t *)db->db_data;
	dmu_buf_rele(db, FTAG);

	if (fuid_size)  {
		nvlist_t **fuidnvp;
		nvlist_t *nvp = NULL;
		uint_t count;
		char *packed;
		int i;

		packed = kmem_alloc(fuid_size, KM_SLEEP);
		VERIFY(dmu_read(os, fuid_obj, 0,
		    fuid_size, packed, DMU_READ_PREFETCH) == 0);
		VERIFY(nvlist_unpack(packed, fuid_size,
		    &nvp, 0) == 0);
		VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
		    &fuidnvp, &count) == 0);

		for (i = 0; i != count; i++) {
			fuid_domain_t *domnode;
			char *domain;
			uint64_t idx;

			VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
			    &domain) == 0);
			VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
			    &idx) == 0);

			domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);

			domnode->f_idx = idx;
			domnode->f_ksid = ksid_lookupdomain(domain);
			avl_add(idx_tree, domnode);
			avl_add(domain_tree, domnode);
		}
		nvlist_free(nvp);
		kmem_free(packed, fuid_size);
	}
	return (fuid_size);
}
Exemple #3
0
void
dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	mutex_enter(&dd->dd_lock);
	ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
	mutex_exit(&dd->dd_lock);

	/* release the hold from dsl_dir_dirty */
	dmu_buf_rele(dd->dd_dbuf, dd);
}
Exemple #4
0
void
sa_handle_destroy(sa_handle_t *hdl)
{
	mutex_enter(&hdl->sa_lock);
	(void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl,
	    NULL, NULL, NULL);

	if (hdl->sa_bonus_tab) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
		hdl->sa_bonus_tab = NULL;
	}
	if (hdl->sa_spill_tab) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
		hdl->sa_spill_tab = NULL;
	}

	dmu_buf_rele(hdl->sa_bonus, NULL);

	if (hdl->sa_spill)
		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
	mutex_exit(&hdl->sa_lock);

	kmem_cache_free(sa_cache, hdl);
}
Exemple #5
0
int
dmu_objset_userspace_upgrade(objset_t *os)
{
	uint64_t obj;
	int err = 0;

	if (dmu_objset_userspace_present(os))
		return (0);
	if (!dmu_objset_userused_enabled(os->os))
		return (ENOTSUP);
	if (dmu_objset_is_snapshot(os))
		return (EINVAL);

	/*
	 * We simply need to mark every object dirty, so that it will be
	 * synced out and now accounted.  If this is called
	 * concurrently, or if we already did some work before crashing,
	 * that's fine, since we track each object's accounted state
	 * independently.
	 */

	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
		dmu_tx_t *tx;
		dmu_buf_t *db;
		int objerr;

		if (issig(JUSTLOOKING) && issig(FORREAL))
			return (EINTR);

		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
		if (objerr)
			continue;
		tx = dmu_tx_create(os);
		dmu_tx_hold_bonus(tx, obj);
		objerr = dmu_tx_assign(tx, TXG_WAIT);
		if (objerr) {
			dmu_tx_abort(tx);
			continue;
		}
		dmu_buf_will_dirty(db, tx);
		dmu_buf_rele(db, FTAG);
		dmu_tx_commit(tx);
	}

	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
	txg_wait_synced(dmu_objset_pool(os), 0);
	return (0);
}
static int
bplist_cache(bplist_t *bpl, uint64_t blkid)
{
	int err = 0;

	if (bpl->bpl_cached_dbuf == NULL ||
	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
		if (bpl->bpl_cached_dbuf != NULL)
			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
		err = dmu_buf_hold(bpl->bpl_mos,
		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
		    bpl, &bpl->bpl_cached_dbuf);
		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
		    1ULL << bpl->bpl_blockshift);
	}
	return (err);
}
Exemple #7
0
int
zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
	zfs_acl_ids_t acl_ids;
	vattr_t vattr;
	znode_t *sharezp;
	vnode_t *vp;
	znode_t *zp;
	int error;

	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
	vattr.va_type = VDIR;
	vattr.va_mode = S_IFDIR|0555;
	vattr.va_uid = crgetuid(kcred);
	vattr.va_gid = crgetgid(kcred);

	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
	sharezp->z_unlinked = 0;
	sharezp->z_atime_dirty = 0;
	sharezp->z_zfsvfs = zfsvfs;

	vp = ZTOV(sharezp);
	vn_reinit(vp);
	vp->v_type = VDIR;

	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
	    kcred, NULL, &acl_ids));
	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE,
	    &zp, 0, &acl_ids);
	ASSERT3P(zp, ==, sharezp);
	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
	zfsvfs->z_shares_dir = sharezp->z_id;

	zfs_acl_ids_free(&acl_ids);
	ZTOV(sharezp)->v_count = 0;
	dmu_buf_rele(sharezp->z_dbuf, NULL);
	sharezp->z_dbuf = NULL;
	kmem_cache_free(znode_cache, sharezp);

	return (error);
}
Exemple #8
0
static void
zvol_last_close(zvol_state_t *zv)
{
	zil_close(zv->zv_zilog);
	zv->zv_zilog = NULL;

	dmu_buf_rele(zv->zv_dbuf, zvol_tag);
	zv->zv_dbuf = NULL;

	/*
	 * Evict cached data
	 */
	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
	    !(zv->zv_flags & ZVOL_RDONLY))
		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
	(void) dmu_objset_evict_dbufs(zv->zv_objset);

	dmu_objset_disown(zv->zv_objset, zvol_tag);
	zv->zv_objset = NULL;
}
Exemple #9
0
void
vdev_indirect_births_close(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	if (vib->vib_phys->vib_count > 0) {
		uint64_t births_size = vdev_indirect_births_size_impl(vib);

		kmem_free(vib->vib_entries, births_size);
		vib->vib_entries = NULL;
	}

	dmu_buf_rele(vib->vib_dbuf, vib);

	vib->vib_objset = NULL;
	vib->vib_object = 0;
	vib->vib_dbuf = NULL;
	vib->vib_phys = NULL;

	kmem_free(vib, sizeof (*vib));
}
Exemple #10
0
int
zfs_sa_readlink(znode_t *zp, uio_t *uio)
{
	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
	size_t bufsz;
	int error;

	bufsz = zp->z_size;
	if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) {
		error = uiomove((caddr_t)db->db_data +
		    ZFS_OLD_ZNODE_PHYS_SIZE,
                        MIN((size_t)bufsz, uio_resid(uio)), UIO_READ, uio);
	} else {
		dmu_buf_t *dbp;
		if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id,
		    0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
			error = uiomove(dbp->db_data,
                            MIN((size_t)bufsz, uio_resid(uio)), UIO_READ, uio);
			dmu_buf_rele(dbp, FTAG);
		}
	}
	return (error);
}
Exemple #11
0
/*
 * XXX: for the moment I don't want to use lnb_flags for osd-internal
 *      purposes as it's not very well defined ...
 *      instead I use the lowest bit of the address so that:
 *        arc buffer:  .lnb_data = abuf          (arc we loan for write)
 *        dbuf buffer: .lnb_data = dbuf | 1      (dbuf we get for read)
 *        copy buffer: .lnb_page->mapping = obj (page we allocate for write)
 *
 *      bzzz, to blame
 */
static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
			struct niobuf_local *lnb, int npages)
{
	struct osd_object *obj  = osd_dt_obj(dt);
	struct osd_device *osd = osd_obj2dev(obj);
	unsigned long      ptr;
	int                i;

	LASSERT(dt_object_exists(dt));
	LASSERT(obj->oo_db);

	for (i = 0; i < npages; i++) {
		if (lnb[i].lnb_page == NULL)
			continue;
		if (lnb[i].lnb_page->mapping == (void *)obj) {
			/* this is anonymous page allocated for copy-write */
			lnb[i].lnb_page->mapping = NULL;
			__free_page(lnb[i].lnb_page);
			atomic_dec(&osd->od_zerocopy_alloc);
		} else {
			/* see comment in osd_bufs_get_read() */
			ptr = (unsigned long)lnb[i].lnb_data;
			if (ptr & 1UL) {
				ptr &= ~1UL;
				dmu_buf_rele((void *)ptr, osd_zerocopy_tag);
				atomic_dec(&osd->od_zerocopy_pin);
			} else if (lnb[i].lnb_data != NULL) {
				dmu_return_arcbuf(lnb[i].lnb_data);
				atomic_dec(&osd->od_zerocopy_loan);
			}
		}
		lnb[i].lnb_page = NULL;
		lnb[i].lnb_data = NULL;
	}

	return 0;
}
Exemple #12
0
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
    dmu_tx_t *tx)
{
	objset_t *mos = dp->dp_meta_objset;
	uint64_t ddobj;
	dsl_dir_phys_t *ddphys;
	dmu_buf_t *dbuf;

	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
	if (pds) {
		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
		    name, sizeof (uint64_t), 1, &ddobj, tx));
	} else {
		/* it's the root dir */
		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
	}
	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
	dmu_buf_will_dirty(dbuf, tx);
	ddphys = dbuf->db_data;

	ddphys->dd_creation_time = gethrestime_sec();
	if (pds)
		ddphys->dd_parent_obj = pds->dd_object;
	ddphys->dd_props_zapobj = zap_create(mos,
	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
	ddphys->dd_child_dir_zapobj = zap_create(mos,
	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
	dmu_buf_rele(dbuf, FTAG);

	return (ddobj);
}
Exemple #13
0
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
    const char *tail, void *tag, dsl_dir_t **ddp)
{
	dmu_buf_t *dbuf;
	dsl_dir_t *dd;
	int err;

	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
	    dsl_pool_sync_context(dp));

	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
	if (err)
		return (err);
	dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbuf, &doi);
		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
	}
#endif
	/* XXX assert bonus buffer size is correct */
	if (dd == NULL) {
		dsl_dir_t *winner;
#ifndef __APPLE__
		int err;
#endif

		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
		dd->dd_object = ddobj;
		dd->dd_dbuf = dbuf;
		dd->dd_pool = dp;
		dd->dd_phys = dbuf->db_data;
		dd->dd_used_bytes = dd->dd_phys->dd_used_bytes;
		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);

		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
		    offsetof(dsl_prop_cb_record_t, cbr_node));

		if (dd->dd_phys->dd_parent_obj) {
			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
			    NULL, dd, &dd->dd_parent);
			if (err) {
				mutex_destroy(&dd->dd_lock);
				kmem_free(dd, sizeof (dsl_dir_t));
				dmu_buf_rele(dbuf, tag);
				return (err);
			}
			if (tail) {
#ifdef ZFS_DEBUG
				uint64_t foundobj;

				err = zap_lookup(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    tail, sizeof (foundobj), 1, &foundobj);
				ASSERT(err || foundobj == ddobj);
#endif
				(void) strcpy(dd->dd_myname, tail);
			} else {
				err = zap_value_search(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    ddobj, 0, dd->dd_myname);
			}
			if (err) {
				dsl_dir_close(dd->dd_parent, dd);
				mutex_destroy(&dd->dd_lock);
				kmem_free(dd, sizeof (dsl_dir_t));
				dmu_buf_rele(dbuf, tag);
				return (err);
			}
		} else {
			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
		}

		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
		    dsl_dir_evict);
		if (winner) {
			if (dd->dd_parent)
				dsl_dir_close(dd->dd_parent, dd);
			mutex_destroy(&dd->dd_lock);
			kmem_free(dd, sizeof (dsl_dir_t));
			dd = winner;
		} else {
			spa_open_ref(dp->dp_spa, dd);
		}
	}

	/*
	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
	 * holds on the spa.  We need the open-to-close holds because
	 * otherwise the spa_refcnt wouldn't change when we open a
	 * dir which the spa also has open, so we could incorrectly
	 * think it was OK to unload/export/destroy the pool.  We need
	 * the instantiate-to-evict hold because the dsl_dir_t has a
	 * pointer to the dd_pool, which has a pointer to the spa_t.
	 */
	spa_open_ref(dp->dp_spa, tag);
	ASSERT3P(dd->dd_pool, ==, dp);
	ASSERT3U(dd->dd_object, ==, ddobj);
	ASSERT3P(dd->dd_dbuf, ==, dbuf);
	*ddp = dd;
	return (0);
}
Exemple #14
0
/*
 * Read out the command history.
 */
int
spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
{
	objset_t *mos = spa->spa_meta_objset;
	dmu_buf_t *dbp;
	uint64_t read_len, phys_read_off, phys_eof;
	uint64_t leftover = 0;
	spa_history_phys_t *shpp;
	int err;

	/*
	 * If the command history  doesn't exist (older pool),
	 * that's ok, just return ENOENT.
	 */
	if (!spa->spa_history)
		return (ENOENT);

	/*
	 * The history is logged asynchronously, so when they request
	 * the first chunk of history, make sure everything has been
	 * synced to disk so that we get it.
	 */
	if (*offp == 0 && spa_writeable(spa))
		txg_wait_synced(spa_get_dsl(spa), 0);

	if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
		return (err);
	shpp = dbp->db_data;

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	mutex_enter(&spa->spa_history_lock);
	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);

	if (*offp < shpp->sh_pool_create_len) {
		/* read in just the zpool create history */
		phys_read_off = *offp;
		read_len = MIN(*len, shpp->sh_pool_create_len -
		    phys_read_off);
	} else {
		/*
		 * Need to reset passed in offset to BOF if the passed in
		 * offset has since been overwritten.
		 */
		*offp = MAX(*offp, shpp->sh_bof);
		phys_read_off = spa_history_log_to_phys(*offp, shpp);

		/*
		 * Read up to the minimum of what the user passed down or
		 * the EOF (physical or logical).  If we hit physical EOF,
		 * use 'leftover' to read from the physical BOF.
		 */
		if (phys_read_off <= phys_eof) {
			read_len = MIN(*len, phys_eof - phys_read_off);
		} else {
			read_len = MIN(*len,
			    shpp->sh_phys_max_off - phys_read_off);
			if (phys_read_off + *len > shpp->sh_phys_max_off) {
				leftover = MIN(*len - read_len,
				    phys_eof - shpp->sh_pool_create_len);
			}
		}
	}

	/* offset for consumer to use next */
	*offp += read_len + leftover;

	/* tell the consumer how much you actually read */
	*len = read_len + leftover;

	if (read_len == 0) {
		mutex_exit(&spa->spa_history_lock);
		dmu_buf_rele(dbp, FTAG);
		return (0);
	}

	err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
	    DMU_READ_PREFETCH);
	if (leftover && err == 0) {
		err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
		    leftover, buf + read_len, DMU_READ_PREFETCH);
	}
	mutex_exit(&spa->spa_history_lock);

	dmu_buf_rele(dbp, FTAG);
	return (err);
}
Exemple #15
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	spa_t		*spa = arg1;
	history_arg_t	*hap = arg2;
	const char	*history_str = hap->ha_history_str;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	nvlist_t	*nvrecord;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
	    gethrestime_sec()) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
	if (hap->ha_zone != NULL)
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
		    hap->ha_zone) == 0);
#ifdef _KERNEL
	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
	    utsname.nodename) == 0);
#endif
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
	    hap->ha_log_type == LOG_CMD_NORMAL) {
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
		    history_str) == 0);

		zfs_dbgmsg("command: %s", history_str);
	} else {
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
		    hap->ha_event) == 0);
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
		    tx->tx_txg) == 0);
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
		    history_str) == 0);

		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
		    zfs_history_event_names[hap->ha_event], spa_name(spa),
		    (longlong_t)tx->tx_txg, history_str);

	}

	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
	record_packed = kmem_alloc(reclen, KM_SLEEP);

	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);

	mutex_enter(&spa->spa_history_lock);
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
		shpp->sh_bof = shpp->sh_pool_create_len;
	}

	mutex_exit(&spa->spa_history_lock);
	nvlist_free(nvrecord);
	kmem_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);

	strfree(hap->ha_history_str);
	if (hap->ha_zone != NULL)
		strfree(hap->ha_zone);
	kmem_free(hap, sizeof (history_arg_t));
}
Exemple #16
0
int
__osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
		const struct lu_buf *buf, const char *name, int fl,
		struct osd_thandle *oh)
{
	struct osd_device *osd = osd_obj2dev(obj);
	dmu_buf_t         *xa_zap_db = NULL;
	dmu_buf_t         *xa_data_db = NULL;
	uint64_t           xa_data_obj;
	sa_handle_t       *sa_hdl = NULL;
	dmu_tx_t          *tx = oh->ot_tx;
	uint64_t           size;
	int                rc;

	LASSERT(obj->oo_sa_hdl);

	if (obj->oo_xattr == ZFS_NO_OBJECT) {
		struct lu_attr *la = &osd_oti_get(env)->oti_la;

		la->la_valid = LA_MODE;
		la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
		rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la,
				      obj->oo_db->db_object, 0);
		if (rc)
			return rc;

		obj->oo_xattr = xa_zap_db->db_object;
		rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
				&obj->oo_xattr, 8, oh);
		if (rc)
			goto out;
	}

	rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
			 &xa_data_obj);
	if (rc == 0) {
		if (fl & LU_XATTR_CREATE) {
			rc = -EEXIST;
			goto out;
		}
		/*
		 * Entry already exists.
		 * We'll truncate the existing object.
		 */
		rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj,
					&xa_data_db);
		if (rc)
			goto out;

		rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
					SA_HDL_PRIVATE, &sa_hdl);
		if (rc)
			goto out;

		rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
		if (rc)
			goto out_sa;

		rc = -dmu_free_range(osd->od_os, xa_data_db->db_object,
				     0, DMU_OBJECT_END, tx);
		if (rc)
			goto out_sa;
	} else if (rc == -ENOENT) {
		struct lu_attr *la = &osd_oti_get(env)->oti_la;
		/*
		 * Entry doesn't exist, we need to create a new one and a new
		 * object to store the value.
		 */
		if (fl & LU_XATTR_REPLACE) {
			/* should be ENOATTR according to the
			 * man, but that is undefined here */
			rc = -ENODATA;
			goto out;
		}

		la->la_valid = LA_MODE;
		la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
		rc = __osd_object_create(env, obj, &xa_data_db, tx, la,
					 obj->oo_xattr);
		if (rc)
			goto out;
		xa_data_obj = xa_data_db->db_object;

		rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
					SA_HDL_PRIVATE, &sa_hdl);
		if (rc)
			goto out;

		rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
				1, &xa_data_obj, tx);
		if (rc)
			goto out_sa;
	} else {
		/* There was an error looking up the xattr name */
		goto out;
	}

	/* Finally write the xattr value */
	dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);

	size = buf->lb_len;
	rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);

out_sa:
	sa_handle_destroy(sa_hdl);
out:
	if (xa_data_db != NULL)
		dmu_buf_rele(xa_data_db, FTAG);
	if (xa_zap_db != NULL)
		dmu_buf_rele(xa_zap_db, FTAG);

	return rc;
}
Exemple #17
0
/*
 * Find layout that corresponds to ordering of attributes
 * If not found a new layout number is created and added to
 * persistent layout tables.
 */
static int
sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
    dmu_tx_t *tx)
{
	sa_os_t *sa = hdl->sa_os->os_sa;
	uint64_t hash;
	sa_buf_type_t buftype;
	sa_hdr_phys_t *sahdr;
	void *data_start;
	int buf_space;
	sa_attr_type_t *attrs, *attrs_start;
	int i, lot_count;
	int hdrsize, spillhdrsize;
	int used;
	dmu_object_type_t bonustype;
	sa_lot_t *lot;
	int len_idx;
	int spill_used;
	boolean_t spilling;

	dmu_buf_will_dirty(hdl->sa_bonus, tx);
	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);

	/* first determine bonus header size and sum of all attributes */
	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
	    SA_BONUS, &i, &used, &spilling);

	if (used > SPA_MAXBLOCKSIZE)
		return (EFBIG);

	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
	    used + hdrsize, tx));

	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
	    bonustype == DMU_OT_SA);

	/* setup and size spill buffer when needed */
	if (spilling) {
		boolean_t dummy;

		if (hdl->sa_spill == NULL) {
			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
			    &hdl->sa_spill) == 0);
		}
		dmu_buf_will_dirty(hdl->sa_spill, tx);

		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
		    &spill_used, &dummy);

		if (spill_used > SPA_MAXBLOCKSIZE)
			return (EFBIG);

		buf_space = hdl->sa_spill->db_size - spillhdrsize;
		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
		    hdl->sa_spill->db_size)
			VERIFY(0 == sa_resize_spill(hdl,
			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
	}

	/* setup starting pointers to lay down data */
	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
	buftype = SA_BONUS;

	if (spilling)
		buf_space = (sa->sa_force_spill) ?
		    0 : SA_BLKPTR_SPACE - hdrsize;
	else
		buf_space = hdl->sa_bonus->db_size - hdrsize;

	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
	    KM_SLEEP);
	lot_count = 0;

	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
		uint16_t length;

		attrs[i] = attr_desc[i].sa_attr;
		length = SA_REGISTERED_LEN(sa, attrs[i]);
		if (length == 0)
			length = attr_desc[i].sa_length;

		if (buf_space < length) {  /* switch to spill buffer */
			VERIFY(bonustype == DMU_OT_SA);
			if (buftype == SA_BONUS && !sa->sa_force_spill) {
				sa_find_layout(hdl->sa_os, hash, attrs_start,
				    lot_count, tx, &lot);
				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
			}

			buftype = SA_SPILL;
			hash = -1ULL;
			len_idx = 0;

			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
			sahdr->sa_magic = SA_MAGIC;
			data_start = (void *)((uintptr_t)sahdr +
			    spillhdrsize);
			attrs_start = &attrs[i];
			buf_space = hdl->sa_spill->db_size - spillhdrsize;
			lot_count = 0;
		}
		hash ^= SA_ATTR_HASH(attrs[i]);
		attr_desc[i].sa_addr = data_start;
		attr_desc[i].sa_size = length;
		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
		    data_start, length);
		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
			sahdr->sa_lengths[len_idx++] = length;
		}
		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
		    length), 8);
		buf_space -= P2ROUNDUP(length, 8);
		lot_count++;
	}

	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);

	/*
	 * Verify that old znodes always have layout number 0.
	 * Must be DMU_OT_SA for arbitrary layouts
	 */
	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
	    (bonustype == DMU_OT_SA && lot->lot_num > 1));

	if (bonustype == DMU_OT_SA) {
		SA_SET_HDR(sahdr, lot->lot_num,
		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
	}

	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
	if (hdl->sa_bonus_tab) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
		hdl->sa_bonus_tab = NULL;
	}
	if (!sa->sa_force_spill)
		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
	if (hdl->sa_spill) {
		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
		if (!spilling) {
			/*
			 * remove spill block that is no longer needed.
			 */
			dmu_buf_rele(hdl->sa_spill, NULL);
			hdl->sa_spill = NULL;
			hdl->sa_spill_tab = NULL;
			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
			    sa_handle_object(hdl), tx));
		} else {
			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
		}
	}

	return (0);
}
Exemple #18
0
int
dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
    const char *tail, void *tag, dsl_dir_t **ddp)
{
	dmu_buf_t *dbuf;
	dsl_dir_t *dd;
	int err;

	ASSERT(dsl_pool_config_held(dp));

	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
	if (err != 0)
		return (err);
	dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbuf, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
	}
#endif
	if (dd == NULL) {
		dsl_dir_t *winner;

		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
		dd->dd_object = ddobj;
		dd->dd_dbuf = dbuf;
		dd->dd_pool = dp;
		dd->dd_phys = dbuf->db_data;
		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);

		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
		    offsetof(dsl_prop_cb_record_t, cbr_node));

		dsl_dir_snap_cmtime_update(dd);

		if (dd->dd_phys->dd_parent_obj) {
			err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
			    NULL, dd, &dd->dd_parent);
			if (err != 0)
				goto errout;
			if (tail) {
#ifdef ZFS_DEBUG
				uint64_t foundobj;

				err = zap_lookup(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    tail, sizeof (foundobj), 1, &foundobj);
				ASSERT(err || foundobj == ddobj);
#endif
				(void) strcpy(dd->dd_myname, tail);
			} else {
				err = zap_value_search(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    ddobj, 0, dd->dd_myname);
			}
			if (err != 0)
				goto errout;
		} else {
			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
		}

		if (dsl_dir_is_clone(dd)) {
			dmu_buf_t *origin_bonus;
			dsl_dataset_phys_t *origin_phys;

			/*
			 * We can't open the origin dataset, because
			 * that would require opening this dsl_dir.
			 * Just look at its phys directly instead.
			 */
			err = dmu_bonus_hold(dp->dp_meta_objset,
			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
			if (err != 0)
				goto errout;
			origin_phys = origin_bonus->db_data;
			dd->dd_origin_txg =
			    origin_phys->ds_creation_txg;
			dmu_buf_rele(origin_bonus, FTAG);
		}

		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
		    dsl_dir_evict);
		if (winner) {
			if (dd->dd_parent)
				dsl_dir_rele(dd->dd_parent, dd);
			mutex_destroy(&dd->dd_lock);
			kmem_free(dd, sizeof (dsl_dir_t));
			dd = winner;
		} else {
			spa_open_ref(dp->dp_spa, dd);
		}
	}

	/*
	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
	 * holds on the spa.  We need the open-to-close holds because
	 * otherwise the spa_refcnt wouldn't change when we open a
	 * dir which the spa also has open, so we could incorrectly
	 * think it was OK to unload/export/destroy the pool.  We need
	 * the instantiate-to-evict hold because the dsl_dir_t has a
	 * pointer to the dd_pool, which has a pointer to the spa_t.
	 */
	spa_open_ref(dp->dp_spa, tag);
	ASSERT3P(dd->dd_pool, ==, dp);
	ASSERT3U(dd->dd_object, ==, ddobj);
	ASSERT3P(dd->dd_dbuf, ==, dbuf);
	*ddp = dd;
	return (0);

errout:
	if (dd->dd_parent)
		dsl_dir_rele(dd->dd_parent, dd);
	mutex_destroy(&dd->dd_lock);
	kmem_free(dd, sizeof (dsl_dir_t));
	dmu_buf_rele(dbuf, tag);
	return (err);
}
Exemple #19
0
int __osd_xattr_get(const struct lu_env *env, struct osd_object *obj,
		struct lu_buf *buf, const char *name, int *sizep)
{
	struct osd_device *osd = osd_obj2dev(obj);
	udmu_objset_t     *uos = &osd->od_objset;
	uint64_t           xa_data_obj;
	dmu_buf_t         *xa_data_db;
	sa_handle_t       *sa_hdl = NULL;
	uint64_t           size;
	int                rc;

	/* check SA_ZPL_DXATTR first then fallback to directory xattr */
	rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
	if (rc != -ENOENT)
		return rc;

	/* are there any extended attributes? */
	if (obj->oo_xattr == ZFS_NO_OBJECT)
		return -ENOENT;

	/* Lookup the object number containing the xattr data */
	rc = -zap_lookup(uos->os, obj->oo_xattr, name, sizeof(uint64_t), 1,
			&xa_data_obj);
	if (rc)
		return rc;

	rc = __osd_obj2dbuf(env, uos->os, xa_data_obj, &xa_data_db, FTAG);
	if (rc)
		return rc;

	rc = -sa_handle_get(uos->os, xa_data_obj, NULL, SA_HDL_PRIVATE,
			&sa_hdl);
	if (rc)
		goto out_rele;

	/* Get the xattr value length / object size */
	rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(uos), &size, 8);
	if (rc)
		goto out;

	if (size > INT_MAX) {
		rc = -EOVERFLOW;
		goto out;
	}

	*sizep = (int)size;

	if (buf == NULL || buf->lb_buf == NULL) {
		/* We only need to return the required size */
		goto out;
	}
	if (*sizep > buf->lb_len) {
		rc = -ERANGE; /* match ldiskfs error */
		goto out;
	}

	rc = -dmu_read(uos->os, xa_data_db->db_object, 0,
			size, buf->lb_buf, DMU_READ_PREFETCH);

out:
	sa_handle_destroy(sa_hdl);
out_rele:
	dmu_buf_rele(xa_data_db, FTAG);
	return rc;
}
Exemple #20
0
int
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len,
    char *addr)
{
	dmu_object_info_t doi;
	ssize_t nbytes;
	itx_t *itx;
	lr_write_t *lr;
	objset_t *os;
	dmu_buf_t *db;
	uint64_t txg;
	uint64_t boff;
	int error;
	uint32_t blocksize;

	/* handle common case */
	if (len <= zvol_immediate_write_sz) {
		itx = zvol_immediate_itx(off, len, addr);
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		return (0);
	}

	txg = dmu_tx_get_txg(tx);
	os = zv->zv_objset;

	/*
	 * We need to dmu_sync() each block in the range.
	 * For this we need the blocksize.
	 */
	error = dmu_object_info(os, ZVOL_OBJ, &doi);
	if (error)
		return (error);
	blocksize = doi.doi_data_block_size;

	/*
	 * We need to immediate write or dmu_sync() each block in the range.
	 */
	while (len) {
		nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
		if (nbytes <= zvol_immediate_write_sz) {
			itx = zvol_immediate_itx(off, nbytes, addr);
		} else {
			boff =  P2ALIGN_TYPED(off, blocksize, uint64_t);
			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			lr->lr_foid = ZVOL_OBJ;
			lr->lr_offset = off;
			lr->lr_length = nbytes;
			lr->lr_blkoff = off - boff;
			BP_ZERO(&lr->lr_blkptr);

			/* XXX - we should do these IOs in parallel */
			VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, boff,
			    FTAG, &db));
			ASSERT(boff == db->db_offset);
			error = dmu_sync(NULL, db, &lr->lr_blkptr,
			    txg, NULL, NULL);
			dmu_buf_rele(db, FTAG);
			if (error) {
				kmem_free(itx, offsetof(itx_t, itx_lr));
				return (error);
			}
			itx->itx_wr_state = WR_COPIED;
		}
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		len -= nbytes;
		off += nbytes;
	}
	return (0);
}
Exemple #21
0
/*ARGSUSED*/
static int
zfs_vfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);	
	objset_t *os = zfsvfs->z_os;
	znode_t	*zp, *nextzp;
	int ret, i;
	int flags;
	
	/*XXX NOEL: delegation admin stuffs, add back if we use delg. admin */
#if 0
	ret = 0; /* UNDEFINED: secpolicy_fs_unmount(cr, vfsp); */
	if (ret) {
		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
		    ZFS_DELEG_PERM_MOUNT, cr);
		if (ret)
			return (ret);
	}

	/*
	 * We purge the parent filesystem's vfsp as the parent filesystem
	 * and all of its snapshots have their vnode's v_vfsp set to the
	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
	 * referential for non-snapshots.
	 */
	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
#endif

	/*
	 * Unmount any snapshots mounted under .zfs before unmounting the
	 * dataset itself.
	 */
#if 0
	if (zfsvfs->z_ctldir != NULL &&
	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
		return (ret);
#endif
	flags = SKIPSYSTEM;
	if (mntflags & MNT_FORCE)
		flags |= FORCECLOSE;

	ret = vflush(mp, NULLVP, flags);

	/*
	 * Mac OS X needs a file system modify time
	 *
	 * We use the mtime of the "com.apple.system.mtime" 
	 * extended attribute, which is associated with the
	 * file system root directory.
	 *
	 * Here we need to release the ref we took on z_mtime_vp during mount.
	 */
	if ((ret == 0) || (mntflags & MNT_FORCE)) {
		if (zfsvfs->z_mtime_vp != NULL) {
			struct vnode *mvp;

			mvp = zfsvfs->z_mtime_vp;
			zfsvfs->z_mtime_vp = NULL;

			if (vnode_get(mvp) == 0) {
				vnode_rele(mvp);
				vnode_recycle(mvp);
				vnode_put(mvp);
			}
		}
	}

	if (!(mntflags & MNT_FORCE)) {
		/*
		 * Check the number of active vnodes in the file system.
		 * Our count is maintained in the vfs structure, but the
		 * number is off by 1 to indicate a hold on the vfs
		 * structure itself.
		 *
		 * The '.zfs' directory maintains a reference of its
		 * own, and any active references underneath are
		 * reflected in the vnode count.
		 */
		
		if (ret)
			return (EBUSY);
#if 0
		if (zfsvfs->z_ctldir == NULL) {
			if (vfsp->vfs_count > 1)
				return (EBUSY);
		} else {
			if (vfsp->vfs_count > 2 ||
			    zfsvfs->z_ctldir->v_count > 1) {
				return (EBUSY);
			}
		}
#endif
	}

	rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER);
	rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER);

	/*
	 * At this point there are no vops active, and any new vops will
	 * fail with EIO since we have z_unmount_lock for writer (only
	 * relavent for forced unmount).
	 *
	 * Release all holds on dbufs.
	 * Note, the dmu can still callback via znode_pageout_func()
	 * which can zfs_znode_free() the znode.  So we lock
	 * z_all_znodes; search the list for a held dbuf; drop the lock
	 * (we know zp can't disappear if we hold a dbuf lock) then
	 * regrab the lock and restart.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
		if (zp->z_dbuf_held) {
			/* dbufs should only be held when force unmounting */
			zp->z_dbuf_held = 0;
			mutex_exit(&zfsvfs->z_znodes_lock);
			dmu_buf_rele(zp->z_dbuf, NULL);
			/* Start again */
			mutex_enter(&zfsvfs->z_znodes_lock);
			nextzp = list_head(&zfsvfs->z_all_znodes);
		}
	}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * Set the unmounted flag and let new vops unblock.
	 * zfs_inactive will have the unmounted behavior, and all other
	 * vops will fail with EIO.
	 */
	zfsvfs->z_unmounted = B_TRUE;
	rw_exit(&zfsvfs->z_unmount_lock);
	rw_exit(&zfsvfs->z_unmount_inactive_lock);

	/*
	 * Unregister properties.
	 */
#ifndef __APPLE__
	if (!dmu_objset_is_snapshot(os))
		zfs_unregister_callbacks(zfsvfs);
#endif
	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	/*
	 * Evict all dbufs so that cached znodes will be freed
	 */
	if (dmu_objset_evict_dbufs(os, B_TRUE)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(os, B_FALSE);
	}

	/*
	 * Finally close the objset
	 */
	dmu_objset_close(os);

	/*
	 * We can now safely destroy the '.zfs' directory node.
	 */
#if 0
	if (zfsvfs->z_ctldir != NULL)
		zfsctl_destroy(zfsvfs);
#endif

	/*
	 * Note that this work is normally done in zfs_freevfs, but since
	 * there is no VOP_FREEVFS in OSX, we free VFS items here
	 */
	OSDecrementAtomic((SInt32 *)&zfs_active_fs_count);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	list_destroy(&zfsvfs->z_all_znodes);
 	rw_destroy(&zfsvfs->z_unmount_lock);
 	rw_destroy(&zfsvfs->z_unmount_inactive_lock);

	return (0);
}


 
struct vnode* vnode_getparent(struct vnode *vp);  /* sys/vnode_internal.h */

static int
zfs_vget_internal(zfsvfs_t *zfsvfs, ino64_t ino, struct vnode **vpp)
{
	struct vnode	*vp;
	struct vnode	*dvp = NULL;
	znode_t		*zp;
	int		error;

	*vpp = NULL;
	
	/*
	 * On Mac OS X we always export the root directory id as 2
	 * and its parent as 1
	 */
	if (ino == 2 || ino == 1)
		ino = zfsvfs->z_root;
	
	if ((error = zfs_zget(zfsvfs, ino, &zp)))
		goto out;

	/* Don't expose EA objects! */
	if (zp->z_phys->zp_flags & ZFS_XATTR) {
		vnode_put(ZTOV(zp));
		error = ENOENT;
		goto out;
	}

	*vpp = vp = ZTOV(zp);

	if (vnode_isvroot(vp))
		goto out;

	/*
	 * If this znode didn't just come from the cache then
	 * it won't have a valid identity (parent and name).
	 *
	 * Manually fix its identity here (normally done by namei lookup).
	 */
	if ((dvp = vnode_getparent(vp)) == NULL) {
		if (zp->z_phys->zp_parent != 0 &&
		    zfs_vget_internal(zfsvfs, zp->z_phys->zp_parent, &dvp)) {
			goto out;
		}
		if ( vnode_isdir(dvp) ) {
			char objname[ZAP_MAXNAMELEN];  /* 256 bytes */
			int flags = VNODE_UPDATE_PARENT;

			/* Look for znode's name in its parent's zap */
			if ( zap_value_search(zfsvfs->z_os,
			                      zp->z_phys->zp_parent, 
			                      zp->z_id,
			                      ZFS_DIRENT_OBJ(-1ULL),
			                      objname) == 0 ) {
				flags |= VNODE_UPDATE_NAME;
			}

			/* Update the znode's parent and name */
			vnode_update_identity(vp, dvp, objname, 0, 0, flags);
		}
	}
	/* All done with znode's parent */
	vnode_put(dvp);
out:
	return (error);
}

/*
 * Get a vnode from a file id (ignoring the generation)
 *
 * Use by NFS Server (readdirplus) and VFS (build_path)
 */
static int
zfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
	int error;

	ZFS_ENTER(zfsvfs);

	/*
	 * On Mac OS X we always export the root directory id as 2.
	 * So we don't expect to see the real root directory id
	 * from zfs_vfs_vget KPI (unless of course the real id was
	 * already 2).
	 */
	if ((ino == zfsvfs->z_root) && (zfsvfs->z_root != 2)) {
		ZFS_EXIT(zfsvfs);
		return (ENOENT);
	}
	error = zfs_vget_internal(zfsvfs, ino, vpp);

	ZFS_EXIT(zfsvfs);
	return (error);
}
Exemple #22
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
	nvlist_t	*nvl = arg;
	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename);

	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		zfs_dbgmsg("command: %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		} else {
			zfs_dbgmsg("txg %lld %s %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		}
		/*
		 * The history sysevent is posted only for internal history
		 * messages to show what has happened, not how it happened. For
		 * example, the following command:
		 *
		 * # zfs destroy -r tank/foo
		 *
		 * will result in one sysevent posted per dataset that is
		 * destroyed as a result of the command - which could be more
		 * than one event in total.  By contrast, if the sysevent was
		 * posted as a result of the ZPOOL_HIST_CMD key being present
		 * it would result in only one sysevent being posted with the
		 * full command line arguments, requiring the consumer to know
		 * how to parse and understand zfs(1M) command invocations.
		 */
		spa_history_log_notify(spa, nvl);
	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
		zfs_dbgmsg("ioctl %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
	}

	VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE,
	    KM_SLEEP), ==, 0);

	mutex_enter(&spa->spa_history_lock);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	/* The first command is the create, which we keep forever */
	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
	}

	mutex_exit(&spa->spa_history_lock);
	fnvlist_pack_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);
	fnvlist_free(nvl);
}
Exemple #23
0
static void
zfs_objset_close(zfsvfs_t *zfsvfs)
{
	zfs_delete_t	*zd = &zfsvfs->z_delete_head;
	znode_t		*zp, *nextzp;
	objset_t	*os = zfsvfs->z_os;

	/*
	 * Stop all delete threads.
	 */
	(void) zfs_delete_thread_target(zfsvfs, 0);

	/*
	 * For forced unmount, at this point all vops except zfs_inactive
	 * are erroring EIO. We need to now suspend zfs_inactive threads
	 * while we are freeing dbufs before switching zfs_inactive
	 * to use behaviour without a objset.
	 */
	rw_enter(&zfsvfs->z_um_lock, RW_WRITER);

	/*
	 * Release all delete in progress znodes
	 * They will be processed when the file system remounts.
	 */
	mutex_enter(&zd->z_mutex);
	while (zp = list_head(&zd->z_znodes)) {
		list_remove(&zd->z_znodes, zp);
		zp->z_dbuf_held = 0;
		dmu_buf_rele(zp->z_dbuf, NULL);
	}
	mutex_exit(&zd->z_mutex);

	/*
	 * Release all holds on dbufs
	 * Note, although we have stopped all other vop threads and
	 * zfs_inactive(), the dmu can callback via znode_pageout_func()
	 * which can zfs_znode_free() the znode.
	 * So we lock z_all_znodes; search the list for a held
	 * dbuf; drop the lock (we know zp can't disappear if we hold
	 * a dbuf lock; then regrab the lock and restart.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
		if (zp->z_dbuf_held) {
			/* dbufs should only be held when force unmounting */
			zp->z_dbuf_held = 0;
			mutex_exit(&zfsvfs->z_znodes_lock);
			dmu_buf_rele(zp->z_dbuf, NULL);
			/* Start again */
			mutex_enter(&zfsvfs->z_znodes_lock);
			nextzp = list_head(&zfsvfs->z_all_znodes);
		}
	}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * Unregister properties.
	 */
	if (!dmu_objset_is_snapshot(os))
		zfs_unregister_callbacks(zfsvfs);

	/*
	 * Switch zfs_inactive to behaviour without an objset.
	 * It just tosses cached pages and frees the znode & vnode.
	 * Then re-enable zfs_inactive threads in that new behaviour.
	 */
	zfsvfs->z_unmounted2 = B_TRUE;
	rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */

	/*
	 * Close the zil. Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	/*
	 * Evict all dbufs so that cached znodes will be freed
	 */
	if (dmu_objset_evict_dbufs(os, 1)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(os, 0);
	}

	/*
	 * Finally close the objset
	 */
	dmu_objset_close(os);

	/*
	 * We can now safely destroy the '.zfs' directory node.
	 */
	if (zfsvfs->z_ctldir != NULL)
		zfsctl_destroy(zfsvfs);

}
Exemple #24
0
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	char		obj_name[17];
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;

#ifndef __APPLE__
	ASSERT(ZTOV(zp)->v_count == 0);
#endif /*!__APPLE__*/
	ASSERT(zp->z_phys->zp_links == 0);

#ifdef ZFS_DEBUG
	znode_stalker(zp, N_zfs_rmnode);
#endif

	/*
	 * If this is an attribute directory, purge its contents.
	 */
#ifdef __APPLE__
	if (S_ISDIR(zp->z_phys->zp_mode) && (zp->z_phys->zp_flags & ZFS_XATTR)) 
#else
	if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) 
#endif
	{
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it on the unlinked set.
			 */
			return;
		}
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
#ifdef __APPLE__
		/*XXX NOEL: double check this path logic. see radar 5182217.
		 * This may be disturbing some of the evict logic
		 * and hence causing the NULL ptr drefs seen every great while 
		 * in some of the test cases*/
		zp->z_dbuf_held = 0;
		ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
		dmu_buf_rele(zp->z_dbuf, NULL);
#endif /* __APPLE__ */
		return;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	error = zap_remove(os, zfsvfs->z_unlinkedobj,
	    zfs_unlinked_hexname(obj_name, zp->z_id), tx);
	ASSERT3U(error, ==, 0);

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);

	if (xzp)
		VN_RELE(ZTOV(xzp));
}
Exemple #25
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
	nvlist_t	*nvl = arg;
	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
#ifdef _KERNEL
	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
#endif
	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		zfs_dbgmsg("command: %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		} else {
			zfs_dbgmsg("txg %lld %s %s",
			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
		}
	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
		zfs_dbgmsg("ioctl %s",
		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
	}

	record_packed = fnvlist_pack(nvl, &reclen);

	mutex_enter(&spa->spa_history_lock);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	/* The first command is the create, which we keep forever */
	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
	}

	mutex_exit(&spa->spa_history_lock);
	fnvlist_pack_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);
	fnvlist_free(nvl);
}
Exemple #26
0
static int
zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
    void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
    dmu_tx_t *tx)
{
	uint64_t b, newblk;
	dmu_buf_t *db_old, *db_new;
	int err;
	int bs = FZAP_BLOCK_SHIFT(zap);
	int hepb = 1<<(bs-4);
	/* hepb = half the number of entries in a block */

	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
	ASSERT(tbl->zt_blk != 0);
	ASSERT(tbl->zt_numblks > 0);

	if (tbl->zt_nextblk != 0) {
		newblk = tbl->zt_nextblk;
	} else {
		newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
		tbl->zt_nextblk = newblk;
		ASSERT0(tbl->zt_blks_copied);
		dmu_prefetch(zap->zap_objset, zap->zap_object,
		    tbl->zt_blk << bs, tbl->zt_numblks << bs);
	}

	/*
	 * Copy the ptrtbl from the old to new location.
	 */

	b = tbl->zt_blks_copied;
	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
	if (err)
		return (err);

	/* first half of entries in old[b] go to new[2*b+0] */
	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db_new, tx);
	transfer_func(db_old->db_data, db_new->db_data, hepb);
	dmu_buf_rele(db_new, FTAG);

	/* second half of entries in old[b] go to new[2*b+1] */
	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
	dmu_buf_will_dirty(db_new, tx);
	transfer_func((uint64_t *)db_old->db_data + hepb,
	    db_new->db_data, hepb);
	dmu_buf_rele(db_new, FTAG);

	dmu_buf_rele(db_old, FTAG);

	tbl->zt_blks_copied++;

	dprintf("copied block %llu of %llu\n",
	    tbl->zt_blks_copied, tbl->zt_numblks);

	if (tbl->zt_blks_copied == tbl->zt_numblks) {
		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
		    tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);

		tbl->zt_blk = newblk;
		tbl->zt_numblks *= 2;
		tbl->zt_shift++;
		tbl->zt_nextblk = 0;
		tbl->zt_blks_copied = 0;

		dprintf("finished; numblocks now %llu (%lluk entries)\n",
		    tbl->zt_numblks, 1<<(tbl->zt_shift-10));
	}

	return (0);
}
Exemple #27
0
void
sa_buf_rele(dmu_buf_t *db, void *tag)
{
	dmu_buf_rele(db, tag);
}
Exemple #28
0
static
#endif
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
    dmu_tx_t *tx)
{
	fuid_domain_t searchnode, *findnode;
	avl_index_t loc;

	/*
	 * If the dummy "nobody" domain then return an index of 0
	 * to cause the created FUID to be a standard POSIX id
	 * for the user nobody.
	 */
	if (domain[0] == '\0') {
		*retdomain = "";
		return (0);
	}

	searchnode.f_ksid = ksid_lookupdomain(domain);
	if (retdomain) {
		*retdomain = searchnode.f_ksid->kd_name;
	}
	if (!zfsvfs->z_fuid_loaded)
		zfs_fuid_init(zfsvfs, tx);

	rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
	findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
	rw_exit(&zfsvfs->z_fuid_lock);

	if (findnode) {
		ksiddomain_rele(searchnode.f_ksid);
		return (findnode->f_idx);
	} else {
		fuid_domain_t *domnode;
		nvlist_t *nvp;
		nvlist_t **fuids;
		uint64_t retidx;
		size_t nvsize = 0;
		char *packed;
		dmu_buf_t *db;
		int i = 0;

		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
		domnode->f_ksid = searchnode.f_ksid;

		rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
		retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;

		avl_add(&zfsvfs->z_fuid_domain, domnode);
		avl_add(&zfsvfs->z_fuid_idx, domnode);
		/*
		 * Now resync the on-disk nvlist.
		 */
		VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

		domnode = avl_first(&zfsvfs->z_fuid_domain);
		fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
		while (domnode) {
			VERIFY(nvlist_alloc(&fuids[i],
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
			VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
			    domnode->f_idx) == 0);
			VERIFY(nvlist_add_uint64(fuids[i],
			    FUID_OFFSET, 0) == 0);
			VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
			    domnode->f_ksid->kd_name) == 0);
			domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
		}
		VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
		    fuids, retidx) == 0);
		for (i = 0; i != retidx; i++)
			nvlist_free(fuids[i]);
		kmem_free(fuids, retidx * sizeof (void *));
		VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
		packed = kmem_alloc(nvsize, KM_SLEEP);
		VERIFY(nvlist_pack(nvp, &packed, &nvsize,
		    NV_ENCODE_XDR, KM_SLEEP) == 0);
		nvlist_free(nvp);
		zfsvfs->z_fuid_size = nvsize;
		dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
		    zfsvfs->z_fuid_size, packed, tx);
		kmem_free(packed, zfsvfs->z_fuid_size);
		VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
		    FTAG, &db));
		dmu_buf_will_dirty(db, tx);
		*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
		dmu_buf_rele(db, FTAG);

		rw_exit(&zfsvfs->z_fuid_lock);
		return (retidx);
	}
}
Exemple #29
0
/*
 * sync out AVL trees to persistent storage.
 */
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
#ifdef HAVE_ZPL
	nvlist_t *nvp;
	nvlist_t **fuids;
	size_t nvsize = 0;
	char *packed;
	dmu_buf_t *db;
	fuid_domain_t *domnode;
	int numnodes;
	int i;

	if (!zfsvfs->z_fuid_dirty) {
		return;
	}

	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);

	/*
	 * First see if table needs to be created?
	 */
	if (zfsvfs->z_fuid_obj == 0) {
		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
		    sizeof (uint64_t), tx);
		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
		    &zfsvfs->z_fuid_obj, tx) == 0);
	}

	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
		    domnode->f_idx) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
		    domnode->f_ksid->kd_name) == 0);
	}
	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
	    fuids, numnodes) == 0);
	for (i = 0; i != numnodes; i++)
		nvlist_free(fuids[i]);
	kmem_free(fuids, numnodes * sizeof (void *));
	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
	packed = kmem_alloc(nvsize, KM_SLEEP);
	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);
	nvlist_free(nvp);
	zfsvfs->z_fuid_size = nvsize;
	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
	    zfsvfs->z_fuid_size, packed, tx);
	kmem_free(packed, zfsvfs->z_fuid_size);
	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
	    FTAG, &db));
	dmu_buf_will_dirty(db, tx);
	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
	dmu_buf_rele(db, FTAG);

	zfsvfs->z_fuid_dirty = B_FALSE;
	rw_exit(&zfsvfs->z_fuid_lock);
#endif /* HAVE_ZPL */
}