예제 #1
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, ...) called and then assigned
 * to a transaction group.
 *
 * Using ZAP_FLAG_HASH64 will force the ZAP to always be a FAT ZAP.
 * This is fine for directories today, because storing the FID in the dirent
 * will also require a FAT ZAP.  If there is a new type of micro ZAP created
 * then we might need to re-evaluate the use of this flag and instead do
 * a conversion from the different internal ZAP hash formats being used. */
int __osd_zap_create(const struct lu_env *env, udmu_objset_t *uos,
		     dmu_buf_t **zap_dbp, dmu_tx_t *tx,
		     struct lu_attr *la, void *tag, zap_flags_t flags)
{
	uint64_t oid;
	int	 rc;

	LASSERT(tag);

	spin_lock(&uos->lock);
	uos->objects++;
	spin_unlock(&uos->lock);

	/* Assert that the transaction has been assigned to a
	   transaction group. */
	LASSERT(tx->tx_txg != 0);

	oid = zap_create_flags(uos->os, 0, flags | ZAP_FLAG_HASH64,
			       DMU_OT_DIRECTORY_CONTENTS, 12, 12,
			       DMU_OT_SA, DN_MAX_BONUSLEN, tx);

	rc = -sa_buf_hold(uos->os, oid, tag, zap_dbp);
	if (rc)
		return rc;

	LASSERT(la->la_valid & LA_MODE);
	la->la_size = 2;
	la->la_nlink = 1;

	return __osd_attr_init(env, uos, oid, tx, la);
}
예제 #2
0
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
		   uint64_t oid, dmu_buf_t **dbp, void *tag)
{
	dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi;
	int rc;

	LASSERT(tag);

	rc = -sa_buf_hold(os, oid, tag, dbp);
	if (rc)
		return rc;

	dmu_object_info_from_db(*dbp, doi);
	if (unlikely (oid != DMU_USERUSED_OBJECT &&
	    oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) {
		sa_buf_rele(*dbp, tag);
		*dbp = NULL;
		return -EINVAL;
	}

	LASSERT(*dbp);
	LASSERT((*dbp)->db_object == oid);
	LASSERT((*dbp)->db_offset == -1);
	LASSERT((*dbp)->db_data != NULL);

	return 0;
}
예제 #3
0
static int
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
    dmu_buf_t **db, void *tag)
{
	dmu_object_info_t doi;
	int error;

	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
		return (error);

	dmu_object_info_from_db(*db, &doi);
	if ((doi.doi_bonus_type != DMU_OT_SA &&
	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
		sa_buf_rele(*db, tag);
		return (SET_ERROR(ENOTSUP));
	}

	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
	if (error != 0) {
		sa_buf_rele(*db, tag);
		return (error);
	}

	return (0);
}
예제 #4
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
 * to a transaction group.
 */
int __osd_object_create(const struct lu_env *env, udmu_objset_t *uos,
			dmu_buf_t **dbp, dmu_tx_t *tx,
			struct lu_attr *la, void *tag)
{
	uint64_t oid;
	int	 rc;

	LASSERT(tag);
	spin_lock(&uos->lock);
	uos->objects++;
	spin_unlock(&uos->lock);

	/* Assert that the transaction has been assigned to a
	   transaction group. */
	LASSERT(tx->tx_txg != 0);

	/* Create a new DMU object. */
	oid = dmu_object_alloc(uos->os, DMU_OT_PLAIN_FILE_CONTENTS, 0,
			       DMU_OT_SA, DN_MAX_BONUSLEN, tx);
	rc = -sa_buf_hold(uos->os, oid, tag, dbp);
	if (rc)
		return rc;

	LASSERT(la->la_valid & LA_MODE);
	la->la_size = 0;
	la->la_nlink = 1;

	return __osd_attr_init(env, uos, oid, tx, la);
}
예제 #5
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, ...) called and then assigned
 * to a transaction group.
 *
 * Using ZAP_FLAG_HASH64 will force the ZAP to always be a FAT ZAP.
 * This is fine for directories today, because storing the FID in the dirent
 * will also require a FAT ZAP.  If there is a new type of micro ZAP created
 * then we might need to re-evaluate the use of this flag and instead do
 * a conversion from the different internal ZAP hash formats being used. */
int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
		     dmu_buf_t **zap_dbp, dmu_tx_t *tx,
		     struct lu_attr *la, zap_flags_t flags)
{
	uint64_t oid;
	int	 rc;

	/* Assert that the transaction has been assigned to a
	   transaction group. */
	LASSERT(tx->tx_txg != 0);

	oid = osd_zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64,
				   DMU_OT_DIRECTORY_CONTENTS,
				   14, /* == ZFS fzap_default_blockshift */
				   DN_MAX_INDBLKSHIFT, /* indirect blockshift */
				   0, tx);

	rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, zap_dbp);
	if (rc)
		return rc;

	la->la_size = 2;
	la->la_nlink = 1;

	return 0;
}
예제 #6
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
 * to a transaction group.
 */
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
			dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la)
{
	uint64_t	     oid;
	int		     rc;
	struct osd_device   *osd = osd_obj2dev(obj);
	const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
	dmu_object_type_t    type = DMU_OT_PLAIN_FILE_CONTENTS;

	/* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks
	 * would get an additional ditto copy */
	if (unlikely(S_ISREG(la->la_mode) &&
		     fid_seq_is_local_file(fid_seq(fid))))
		type = DMU_OTN_UINT8_METADATA;

	/* Create a new DMU object using the default dnode size. */
	oid = osd_dmu_object_alloc(osd->od_os, type, 0, 0, tx);
	rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp);
	LASSERTF(rc == 0, "sa_buf_hold %llu failed: %d\n", oid, rc);

	LASSERT(la->la_valid & LA_MODE);
	la->la_size = 0;
	la->la_nlink = 1;

	return 0;
}
예제 #7
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
 * to a transaction group.
 */
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
			dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la,
			uint64_t parent)
{
	uint64_t	     oid;
	int		     rc;
	struct osd_device   *osd = osd_obj2dev(obj);
	const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
	dmu_object_type_t    type = DMU_OT_PLAIN_FILE_CONTENTS;

	/* Assert that the transaction has been assigned to a
	   transaction group. */
	LASSERT(tx->tx_txg != 0);

	/* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks
	 * would get an additional ditto copy */
	if (unlikely(S_ISREG(la->la_mode) &&
		     fid_seq_is_local_file(fid_seq(fid))))
		type = DMU_OTN_UINT8_METADATA;

	/* Create a new DMU object. */
	oid = dmu_object_alloc(osd->od_os, type, 0,
			       DMU_OT_SA, DN_MAX_BONUSLEN, tx);
	rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp);
	LASSERTF(rc == 0, "sa_buf_hold "LPU64" failed: %d\n", oid, rc);

	LASSERT(la->la_valid & LA_MODE);
	la->la_size = 0;
	la->la_nlink = 1;

	rc = __osd_attr_init(env, osd, oid, tx, la, parent);
	if (rc != 0) {
		sa_buf_rele(*dbp, osd_obj_tag);
		*dbp = NULL;
		dmu_object_free(osd->od_os, oid, tx);
		return rc;
	}

	return 0;
}
예제 #8
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;

	*zpp = NULL;

again:
	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		zp = sa_get_userdata(hdl);


		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			/*
			 * If igrab() returns NULL the VFS has independently
			 * determined the inode should be evicted and has
			 * called iput_final() to start the eviction process.
			 * The SA handle is still valid but because the VFS
			 * requires that the eviction succeed we must drop
			 * our locks and references to allow the eviction to
			 * complete.  The zfs_zget() may then be retried.
			 *
			 * This unlikely case could be optimized by registering
			 * a sops->drop_inode() callback.  The callback would
			 * need to detect the active SA hold thereby informing
			 * the VFS that this inode should not be evicted.
			 */
			if (igrab(ZTOI(zp)) == NULL) {
				mutex_exit(&zp->z_lock);
				sa_buf_rele(db, NULL);
				ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
				goto again;
			}
			*zpp = zp;
			err = 0;
		}
		mutex_exit(&zp->z_lock);
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}
예제 #9
0
/*
 * Create a new DMU object to hold a zfs znode.
 *
 *	IN:	dzp	- parent directory for new znode
 *		vap	- file attributes for new znode
 *		tx	- dmu transaction id for zap operations
 *		cr	- credentials of caller
 *		flag	- flags:
 *			  IS_ROOT_NODE	- new object will be root
 *			  IS_XATTR	- new object is an attribute
 *		bonuslen - length of bonus buffer
 *		setaclp  - File/Dir initial ACL
 *		fuidp	 - Tracks fuid allocation.
 *
 *	OUT:	zpp	- allocated znode
 *
 */
void
zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
{
	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
	uint64_t	mode, size, links, parent, pflags;
	uint64_t	dzp_pflags = 0;
	uint64_t	rdev = 0;
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	dmu_buf_t	*db;
	timestruc_t	now;
	uint64_t	gen, obj;
	int		bonuslen;
	sa_handle_t	*sa_hdl;
	dmu_object_type_t obj_type;
	sa_bulk_attr_t	*sa_attrs;
	int		cnt = 0;
	zfs_acl_locator_cb_t locate = { 0 };

	if (zsb->z_replay) {
		obj = vap->va_nodeid;
		now = vap->va_ctime;		/* see zfs_replay_create() */
		gen = vap->va_nblocks;		/* ditto */
	} else {
		obj = 0;
		gethrestime(&now);
		gen = dmu_tx_get_txg(tx);
	}

	obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
	bonuslen = (obj_type == DMU_OT_SA) ?
	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;

	/*
	 * Create a new DMU object.
	 */
	/*
	 * There's currently no mechanism for pre-reading the blocks that will
	 * be needed to allocate a new object, so we accept the small chance
	 * that there will be an i/o error and we will fail one of the
	 * assertions below.
	 */
	if (S_ISDIR(vap->va_mode)) {
		if (zsb->z_replay) {
			VERIFY0(zap_create_claim_norm(zsb->z_os, obj,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx));
		} else {
			obj = zap_create_norm(zsb->z_os,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx);
		}
	} else {
		if (zsb->z_replay) {
			VERIFY0(dmu_object_claim(zsb->z_os, obj,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx));
		} else {
			obj = dmu_object_alloc(zsb->z_os,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx);
		}
	}

	ZFS_OBJ_HOLD_ENTER(zsb, obj);
	VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));

	/*
	 * If this is the root, fix up the half-initialized parent pointer
	 * to reference the just-allocated physical data area.
	 */
	if (flag & IS_ROOT_NODE) {
		dzp->z_id = obj;
	} else {
		dzp_pflags = dzp->z_pflags;
	}

	/*
	 * If parent is an xattr, so am I.
	 */
	if (dzp_pflags & ZFS_XATTR) {
		flag |= IS_XATTR;
	}

	if (zsb->z_use_fuids)
		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
	else
		pflags = 0;

	if (S_ISDIR(vap->va_mode)) {
		size = 2;		/* contents ("." and "..") */
		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
	} else {
		size = links = 0;
	}

	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
		rdev = vap->va_rdev;

	parent = dzp->z_id;
	mode = acl_ids->z_mode;
	if (flag & IS_XATTR)
		pflags |= ZFS_XATTR;

	/*
	 * No execs denied will be deterimed when zfs_mode_compute() is called.
	 */
	pflags |= acl_ids->z_aclp->z_hints &
	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);

	ZFS_TIME_ENCODE(&now, crtime);
	ZFS_TIME_ENCODE(&now, ctime);

	if (vap->va_mask & ATTR_ATIME) {
		ZFS_TIME_ENCODE(&vap->va_atime, atime);
	} else {
		ZFS_TIME_ENCODE(&now, atime);
	}

	if (vap->va_mask & ATTR_MTIME) {
		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
	} else {
		ZFS_TIME_ENCODE(&now, mtime);
	}

	/* Now add in all of the "SA" attributes */
	VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
	    &sa_hdl));

	/*
	 * Setup the array of attributes to be replaced/set on the new file
	 *
	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
	 * in the old znode_phys_t format.  Don't change this ordering
	 */
	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
	} else {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
		    NULL, &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
		    NULL, &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
	}

	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
		    &empty_xattr, 8);
	}
	if (obj_type == DMU_OT_ZNODE ||
	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
		    NULL, &rdev, 8);
	}
	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
		    &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
		    &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
		    sizeof (uint64_t) * 4);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
		    &acl_phys, sizeof (zfs_acl_phys_t));
	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
		    &acl_ids->z_aclp->z_acl_count, 8);
		locate.cb_aclp = acl_ids->z_aclp;
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
		    zfs_acl_data_locator, &locate,
		    acl_ids->z_aclp->z_acl_bytes);
		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
		    acl_ids->z_fuid, acl_ids->z_fgid);
	}

	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);

	if (!(flag & IS_ROOT_NODE)) {
		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
		    ZTOI(dzp));
		VERIFY(*zpp != NULL);
		VERIFY(dzp != NULL);
	} else {
		/*
		 * If we are creating the root node, the "parent" we
		 * passed in is the znode for the root.
		 */
		*zpp = dzp;

		(*zpp)->z_sa_hdl = sa_hdl;
	}

	(*zpp)->z_pflags = pflags;
	(*zpp)->z_mode = mode;

	if (obj_type == DMU_OT_ZNODE ||
	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
	}
	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
	ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
예제 #10
0
int
zfs_rezget(znode_t *zp)
{
	zfs_sb_t *zsb = ZTOZSB(zp);
	dmu_object_info_t doi;
	dmu_buf_t *db;
	uint64_t obj_num = zp->z_id;
	uint64_t mode;
	sa_bulk_attr_t bulk[8];
	int err;
	int count = 0;
	uint64_t gen;

	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	mutex_enter(&zp->z_acl_lock);
	if (zp->z_acl_cached) {
		zfs_acl_free(zp->z_acl_cached);
		zp->z_acl_cached = NULL;
	}
	mutex_exit(&zp->z_acl_lock);

	rw_enter(&zp->z_xattr_lock, RW_WRITER);
	if (zp->z_xattr_cached) {
		nvlist_free(zp->z_xattr_cached);
		zp->z_xattr_cached = NULL;
	}

	if (zp->z_xattr_parent) {
		iput(ZTOI(zp->z_xattr_parent));
		zp->z_xattr_parent = NULL;
	}
	rw_exit(&zp->z_xattr_lock);

	ASSERT(zp->z_sa_hdl == NULL);
	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);

	/* reload cached values */
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
	    &gen, sizeof (gen));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
	    &zp->z_size, sizeof (zp->z_size));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
	    &zp->z_links, sizeof (zp->z_links));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
	    &zp->z_pflags, sizeof (zp->z_pflags));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
	    &zp->z_atime, sizeof (zp->z_atime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
	    &zp->z_uid, sizeof (zp->z_uid));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
	    &zp->z_gid, sizeof (zp->z_gid));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
	    &mode, sizeof (mode));

	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
		zfs_znode_dmu_fini(zp);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EIO));
	}

	zp->z_mode = mode;

	if (gen != zp->z_gen) {
		zfs_znode_dmu_fini(zp);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EIO));
	}

	zp->z_unlinked = (zp->z_links == 0);
	zp->z_blksz = doi.doi_data_block_size;
	zfs_inode_update(zp);

	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

	return (0);
}
예제 #11
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;
	struct inode	*ip;

	*zpp = NULL;

again:
	ip = ilookup(zsb->z_sb, obj_num);

	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		if (ip == NULL) {
			/*
			 * ilookup returned NULL, which means
			 * the znode is dying - but the SA handle isn't
			 * quite dead yet, we need to drop any locks
			 * we're holding, re-schedule the task and try again.
			 */
			sa_buf_rele(db, NULL);
			ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

			schedule();
			goto again;
		}

		zp = sa_get_userdata(hdl);

		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			igrab(ZTOI(zp));
			*zpp = zp;
			err = 0;
		}
		sa_buf_rele(db, NULL);
		mutex_exit(&zp->z_lock);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	ASSERT3P(ip, ==, NULL);

	/*
	 * Not found create new znode/vnode but only if file exists.
	 *
	 * There is a small window where zfs_vget() could
	 * find this object while a file create is still in
	 * progress.  This is checked for in zfs_znode_alloc()
	 *
	 * if zfs_znode_alloc() fails it will drop the hold on the
	 * bonus buffer.
	 */
	zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
	    doi.doi_bonus_type, obj_num, NULL, NULL);
	if (zp == NULL) {
		err = SET_ERROR(ENOENT);
	} else {
		*zpp = zp;
	}
	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
	return (err);
}