Example #1
0
static int
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
    dmu_buf_t **db, void *tag)
{
	dmu_object_info_t doi;
	int error;

	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
		return (error);

	dmu_object_info_from_db(*db, &doi);
	if ((doi.doi_bonus_type != DMU_OT_SA &&
	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
		sa_buf_rele(*db, tag);
		return (SET_ERROR(ENOTSUP));
	}

	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
	if (error != 0) {
		sa_buf_rele(*db, tag);
		return (error);
	}

	return (0);
}
Example #2
0
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
		   uint64_t oid, dmu_buf_t **dbp, void *tag)
{
	dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi;
	int rc;

	LASSERT(tag);

	rc = -sa_buf_hold(os, oid, tag, dbp);
	if (rc)
		return rc;

	dmu_object_info_from_db(*dbp, doi);
	if (unlikely (oid != DMU_USERUSED_OBJECT &&
	    oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) {
		sa_buf_rele(*dbp, tag);
		*dbp = NULL;
		return -EINVAL;
	}

	LASSERT(*dbp);
	LASSERT((*dbp)->db_object == oid);
	LASSERT((*dbp)->db_offset == -1);
	LASSERT((*dbp)->db_data != NULL);

	return 0;
}
Example #3
0
static void osd_object_delete(const struct lu_env *env, struct lu_object *l)
{
	struct osd_object *obj = osd_obj(l);

	if (obj->oo_db != NULL) {
		osd_object_sa_fini(obj);
		if (obj->oo_sa_xattr) {
			nvlist_free(obj->oo_sa_xattr);
			obj->oo_sa_xattr = NULL;
		}
		sa_buf_rele(obj->oo_db, osd_obj_tag);
		cfs_list_del(&obj->oo_sa_linkage);
		obj->oo_db = NULL;
	}
}
Example #4
0
/*
 * The transaction passed to this routine must have
 * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
 * to a transaction group.
 */
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
			dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la,
			uint64_t parent)
{
	uint64_t	     oid;
	int		     rc;
	struct osd_device   *osd = osd_obj2dev(obj);
	const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
	dmu_object_type_t    type = DMU_OT_PLAIN_FILE_CONTENTS;

	/* Assert that the transaction has been assigned to a
	   transaction group. */
	LASSERT(tx->tx_txg != 0);

	/* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks
	 * would get an additional ditto copy */
	if (unlikely(S_ISREG(la->la_mode) &&
		     fid_seq_is_local_file(fid_seq(fid))))
		type = DMU_OTN_UINT8_METADATA;

	/* Create a new DMU object. */
	oid = dmu_object_alloc(osd->od_os, type, 0,
			       DMU_OT_SA, DN_MAX_BONUSLEN, tx);
	rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp);
	LASSERTF(rc == 0, "sa_buf_hold "LPU64" failed: %d\n", oid, rc);

	LASSERT(la->la_valid & LA_MODE);
	la->la_size = 0;
	la->la_nlink = 1;

	rc = __osd_attr_init(env, osd, oid, tx, la, parent);
	if (rc != 0) {
		sa_buf_rele(*dbp, osd_obj_tag);
		*dbp = NULL;
		dmu_object_free(osd->od_os, oid, tx);
		return rc;
	}

	return 0;
}
Example #5
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;

	*zpp = NULL;

again:
	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		zp = sa_get_userdata(hdl);


		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			/*
			 * If igrab() returns NULL the VFS has independently
			 * determined the inode should be evicted and has
			 * called iput_final() to start the eviction process.
			 * The SA handle is still valid but because the VFS
			 * requires that the eviction succeed we must drop
			 * our locks and references to allow the eviction to
			 * complete.  The zfs_zget() may then be retried.
			 *
			 * This unlikely case could be optimized by registering
			 * a sops->drop_inode() callback.  The callback would
			 * need to detect the active SA hold thereby informing
			 * the VFS that this inode should not be evicted.
			 */
			if (igrab(ZTOI(zp)) == NULL) {
				mutex_exit(&zp->z_lock);
				sa_buf_rele(db, NULL);
				ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
				goto again;
			}
			*zpp = zp;
			err = 0;
		}
		mutex_exit(&zp->z_lock);
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}
Example #6
0
int
zfs_rezget(znode_t *zp)
{
	zfs_sb_t *zsb = ZTOZSB(zp);
	dmu_object_info_t doi;
	dmu_buf_t *db;
	uint64_t obj_num = zp->z_id;
	uint64_t mode;
	sa_bulk_attr_t bulk[8];
	int err;
	int count = 0;
	uint64_t gen;

	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	mutex_enter(&zp->z_acl_lock);
	if (zp->z_acl_cached) {
		zfs_acl_free(zp->z_acl_cached);
		zp->z_acl_cached = NULL;
	}
	mutex_exit(&zp->z_acl_lock);

	rw_enter(&zp->z_xattr_lock, RW_WRITER);
	if (zp->z_xattr_cached) {
		nvlist_free(zp->z_xattr_cached);
		zp->z_xattr_cached = NULL;
	}

	if (zp->z_xattr_parent) {
		iput(ZTOI(zp->z_xattr_parent));
		zp->z_xattr_parent = NULL;
	}
	rw_exit(&zp->z_xattr_lock);

	ASSERT(zp->z_sa_hdl == NULL);
	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);

	/* reload cached values */
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
	    &gen, sizeof (gen));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
	    &zp->z_size, sizeof (zp->z_size));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
	    &zp->z_links, sizeof (zp->z_links));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
	    &zp->z_pflags, sizeof (zp->z_pflags));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
	    &zp->z_atime, sizeof (zp->z_atime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
	    &zp->z_uid, sizeof (zp->z_uid));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
	    &zp->z_gid, sizeof (zp->z_gid));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
	    &mode, sizeof (mode));

	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
		zfs_znode_dmu_fini(zp);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EIO));
	}

	zp->z_mode = mode;

	if (gen != zp->z_gen) {
		zfs_znode_dmu_fini(zp);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EIO));
	}

	zp->z_unlinked = (zp->z_links == 0);
	zp->z_blksz = doi.doi_data_block_size;
	zfs_inode_update(zp);

	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

	return (0);
}
Example #7
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;
	struct inode	*ip;

	*zpp = NULL;

again:
	ip = ilookup(zsb->z_sb, obj_num);

	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		if (ip == NULL) {
			/*
			 * ilookup returned NULL, which means
			 * the znode is dying - but the SA handle isn't
			 * quite dead yet, we need to drop any locks
			 * we're holding, re-schedule the task and try again.
			 */
			sa_buf_rele(db, NULL);
			ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

			schedule();
			goto again;
		}

		zp = sa_get_userdata(hdl);

		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			igrab(ZTOI(zp));
			*zpp = zp;
			err = 0;
		}
		sa_buf_rele(db, NULL);
		mutex_exit(&zp->z_lock);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	ASSERT3P(ip, ==, NULL);

	/*
	 * Not found create new znode/vnode but only if file exists.
	 *
	 * There is a small window where zfs_vget() could
	 * find this object while a file create is still in
	 * progress.  This is checked for in zfs_znode_alloc()
	 *
	 * if zfs_znode_alloc() fails it will drop the hold on the
	 * bonus buffer.
	 */
	zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
	    doi.doi_bonus_type, obj_num, NULL, NULL);
	if (zp == NULL) {
		err = SET_ERROR(ENOENT);
	} else {
		*zpp = zp;
	}
	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
	return (err);
}
Example #8
0
void
zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
{
	sa_handle_destroy(hdl);
	sa_buf_rele(db, tag);
}
Example #9
0
/*
 * Concurrency: @dt is write locked.
 */
static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
			     struct lu_attr *attr,
			     struct dt_allocation_hint *hint,
			     struct dt_object_format *dof,
			     struct thandle *th)
{
	struct osd_thread_info	*info = osd_oti_get(env);
	struct lustre_mdt_attrs	*lma = &info->oti_mdt_attrs;
	struct zpl_direntry	*zde = &info->oti_zde.lzd_reg;
	const struct lu_fid	*fid = lu_object_fid(&dt->do_lu);
	struct osd_object	*obj = osd_dt_obj(dt);
	struct osd_device	*osd = osd_obj2dev(obj);
	char			*buf = info->oti_str;
	struct osd_thandle	*oh;
	dmu_buf_t		*db = NULL;
	uint64_t		 zapid, parent = 0;
	int			 rc;

	ENTRY;

	/* concurrent create declarations should not see
	 * the object inconsistent (db, attr, etc).
	 * in regular cases acquisition should be cheap */
	down_write(&obj->oo_guard);

	if (unlikely(dt_object_exists(dt)))
		GOTO(out, rc = -EEXIST);

	LASSERT(osd_invariant(obj));
	LASSERT(dof != NULL);

	LASSERT(th != NULL);
	oh = container_of0(th, struct osd_thandle, ot_super);

	/*
	 * XXX missing: Quote handling.
	 */

	LASSERT(obj->oo_db == NULL);

	/* to follow ZFS on-disk format we need
	 * to initialize parent dnode properly */
	if (hint != NULL && hint->dah_parent != NULL &&
	    !dt_object_remote(hint->dah_parent))
		parent = osd_dt_obj(hint->dah_parent)->oo_db->db_object;

	/* we may fix some attributes, better do not change the source */
	obj->oo_attr = *attr;
	obj->oo_attr.la_valid |= LA_SIZE | LA_NLINK | LA_TYPE;

	db = osd_create_type_f(dof->dof_type)(env, obj, &obj->oo_attr, oh);
	if (IS_ERR(db)) {
		rc = PTR_ERR(db);
		db = NULL;
		GOTO(out, rc);
	}

	zde->zde_pad = 0;
	zde->zde_dnode = db->db_object;
	zde->zde_type = IFTODT(attr->la_mode & S_IFMT);

	zapid = osd_get_name_n_idx(env, osd, fid, buf, sizeof(info->oti_str));

	rc = -zap_add(osd->od_os, zapid, buf, 8, 1, zde, oh->ot_tx);
	if (rc)
		GOTO(out, rc);

	/* Now add in all of the "SA" attributes */
	rc = -sa_handle_get(osd->od_os, db->db_object, NULL,
			    SA_HDL_PRIVATE, &obj->oo_sa_hdl);
	if (rc)
		GOTO(out, rc);

	/* configure new osd object */
	obj->oo_db = db;
	parent = parent != 0 ? parent : zapid;
	rc = __osd_attr_init(env, osd, obj->oo_sa_hdl, oh->ot_tx,
			     &obj->oo_attr, parent);
	if (rc)
		GOTO(out, rc);

	/* XXX: oo_lma_flags */
	obj->oo_dt.do_lu.lo_header->loh_attr |= obj->oo_attr.la_mode & S_IFMT;
	smp_mb();
	obj->oo_dt.do_lu.lo_header->loh_attr |= LOHA_EXISTS;
	if (likely(!fid_is_acct(lu_object_fid(&obj->oo_dt.do_lu))))
		/* no body operations for accounting objects */
		obj->oo_dt.do_body_ops = &osd_body_ops;

	rc = -nvlist_alloc(&obj->oo_sa_xattr, NV_UNIQUE_NAME, KM_SLEEP);
	if (rc)
		GOTO(out, rc);

	/* initialize LMA */
	lustre_lma_init(lma, lu_object_fid(&obj->oo_dt.do_lu), 0, 0);
	lustre_lma_swab(lma);
	rc = -nvlist_add_byte_array(obj->oo_sa_xattr, XATTR_NAME_LMA,
				    (uchar_t *)lma, sizeof(*lma));
	if (rc)
		GOTO(out, rc);
	rc = __osd_sa_xattr_update(env, obj, oh);
	if (rc)
		GOTO(out, rc);

	/* Add new object to inode accounting.
	 * Errors are not considered as fatal */
	rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
				(attr->la_valid & LA_UID) ? attr->la_uid : 0, 1,
				oh->ot_tx);
	if (rc)
		CERROR("%s: failed to add "DFID" to accounting ZAP for usr %d "
			"(%d)\n", osd->od_svname, PFID(fid), attr->la_uid, rc);
	rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
				(attr->la_valid & LA_GID) ? attr->la_gid : 0, 1,
				oh->ot_tx);
	if (rc)
		CERROR("%s: failed to add "DFID" to accounting ZAP for grp %d "
			"(%d)\n", osd->od_svname, PFID(fid), attr->la_gid, rc);

out:
	if (unlikely(rc && db)) {
		dmu_object_free(osd->od_os, db->db_object, oh->ot_tx);
		sa_buf_rele(db, osd_obj_tag);
		obj->oo_db = NULL;
	}
	up_write(&obj->oo_guard);
	RETURN(rc);
}