static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t))) { sa_buf_rele(*db, tag); return (SET_ERROR(ENOTSUP)); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { sa_buf_rele(*db, tag); return (error); } return (0); }
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os, uint64_t oid, dmu_buf_t **dbp, void *tag) { dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi; int rc; LASSERT(tag); rc = -sa_buf_hold(os, oid, tag, dbp); if (rc) return rc; dmu_object_info_from_db(*dbp, doi); if (unlikely (oid != DMU_USERUSED_OBJECT && oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) { sa_buf_rele(*dbp, tag); *dbp = NULL; return -EINVAL; } LASSERT(*dbp); LASSERT((*dbp)->db_object == oid); LASSERT((*dbp)->db_offset == -1); LASSERT((*dbp)->db_data != NULL); return 0; }
static void osd_object_delete(const struct lu_env *env, struct lu_object *l) { struct osd_object *obj = osd_obj(l); if (obj->oo_db != NULL) { osd_object_sa_fini(obj); if (obj->oo_sa_xattr) { nvlist_free(obj->oo_sa_xattr); obj->oo_sa_xattr = NULL; } sa_buf_rele(obj->oo_db, osd_obj_tag); cfs_list_del(&obj->oo_sa_linkage); obj->oo_db = NULL; } }
/* * The transaction passed to this routine must have * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned * to a transaction group. */ int __osd_object_create(const struct lu_env *env, struct osd_object *obj, dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la, uint64_t parent) { uint64_t oid; int rc; struct osd_device *osd = osd_obj2dev(obj); const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu); dmu_object_type_t type = DMU_OT_PLAIN_FILE_CONTENTS; /* Assert that the transaction has been assigned to a transaction group. */ LASSERT(tx->tx_txg != 0); /* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks * would get an additional ditto copy */ if (unlikely(S_ISREG(la->la_mode) && fid_seq_is_local_file(fid_seq(fid)))) type = DMU_OTN_UINT8_METADATA; /* Create a new DMU object. */ oid = dmu_object_alloc(osd->od_os, type, 0, DMU_OT_SA, DN_MAX_BONUSLEN, tx); rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp); LASSERTF(rc == 0, "sa_buf_hold "LPU64" failed: %d\n", oid, rc); LASSERT(la->la_valid & LA_MODE); la->la_size = 0; la->la_nlink = 1; rc = __osd_attr_init(env, osd, oid, tx, la, parent); if (rc != 0) { sa_buf_rele(*dbp, osd_obj_tag); *dbp = NULL; dmu_object_free(osd->od_os, oid, tx); return rc; } return 0; }
int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; int err; sa_handle_t *hdl; *zpp = NULL; again: ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { /* * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. * The SA handle is still valid but because the VFS * requires that the eviction succeed we must drop * our locks and references to allow the eviction to * complete. The zfs_zget() may then be retried. * * This unlikely case could be optimized by registering * a sops->drop_inode() callback. The callback would * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); goto again; } *zpp = zp; err = 0; } mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); }
int zfs_rezget(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_object_info_t doi; dmu_buf_t *db; uint64_t obj_num = zp->z_id; uint64_t mode; sa_bulk_attr_t bulk[8]; int err; int count = 0; uint64_t gen; ZFS_OBJ_HOLD_ENTER(zsb, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } mutex_exit(&zp->z_acl_lock); rw_enter(&zp->z_xattr_lock, RW_WRITER); if (zp->z_xattr_cached) { nvlist_free(zp->z_xattr_cached); zp->z_xattr_cached = NULL; } if (zp->z_xattr_parent) { iput(ZTOI(zp->z_xattr_parent)); zp->z_xattr_parent = NULL; } rw_exit(&zp->z_xattr_lock); ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL); /* reload cached values */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &gen, sizeof (gen)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, sizeof (zp->z_atime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, sizeof (zp->z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, sizeof (zp->z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, sizeof (mode)); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EIO)); } zp->z_mode = mode; if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EIO)); } zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; zfs_inode_update(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (0); }
int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; int err; sa_handle_t *hdl; struct inode *ip; *zpp = NULL; again: ip = ilookup(zsb->z_sb, obj_num); ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { if (ip == NULL) { /* * ilookup returned NULL, which means * the znode is dying - but the SA handle isn't * quite dead yet, we need to drop any locks * we're holding, re-schedule the task and try again. */ sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); schedule(); goto again; } zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { igrab(ZTOI(zp)); *zpp = zp; err = 0; } sa_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (err); } ASSERT3P(ip, ==, NULL); /* * Not found create new znode/vnode but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in * progress. This is checked for in zfs_znode_alloc() * * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, doi.doi_bonus_type, obj_num, NULL, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { *zpp = zp; } ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); }
void zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); sa_buf_rele(db, tag); }
/* * Concurrency: @dt is write locked. */ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *th) { struct osd_thread_info *info = osd_oti_get(env); struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; struct zpl_direntry *zde = &info->oti_zde.lzd_reg; const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); char *buf = info->oti_str; struct osd_thandle *oh; dmu_buf_t *db = NULL; uint64_t zapid, parent = 0; int rc; ENTRY; /* concurrent create declarations should not see * the object inconsistent (db, attr, etc). * in regular cases acquisition should be cheap */ down_write(&obj->oo_guard); if (unlikely(dt_object_exists(dt))) GOTO(out, rc = -EEXIST); LASSERT(osd_invariant(obj)); LASSERT(dof != NULL); LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); /* * XXX missing: Quote handling. */ LASSERT(obj->oo_db == NULL); /* to follow ZFS on-disk format we need * to initialize parent dnode properly */ if (hint != NULL && hint->dah_parent != NULL && !dt_object_remote(hint->dah_parent)) parent = osd_dt_obj(hint->dah_parent)->oo_db->db_object; /* we may fix some attributes, better do not change the source */ obj->oo_attr = *attr; obj->oo_attr.la_valid |= LA_SIZE | LA_NLINK | LA_TYPE; db = osd_create_type_f(dof->dof_type)(env, obj, &obj->oo_attr, oh); if (IS_ERR(db)) { rc = PTR_ERR(db); db = NULL; GOTO(out, rc); } zde->zde_pad = 0; zde->zde_dnode = db->db_object; zde->zde_type = IFTODT(attr->la_mode & S_IFMT); zapid = osd_get_name_n_idx(env, osd, fid, buf, sizeof(info->oti_str)); rc = -zap_add(osd->od_os, zapid, buf, 8, 1, zde, oh->ot_tx); if (rc) GOTO(out, rc); /* Now add in all of the "SA" attributes */ rc = -sa_handle_get(osd->od_os, db->db_object, NULL, SA_HDL_PRIVATE, &obj->oo_sa_hdl); if (rc) GOTO(out, rc); /* configure new osd object */ obj->oo_db = db; parent = parent != 0 ? parent : zapid; rc = __osd_attr_init(env, osd, obj->oo_sa_hdl, oh->ot_tx, &obj->oo_attr, parent); if (rc) GOTO(out, rc); /* XXX: oo_lma_flags */ obj->oo_dt.do_lu.lo_header->loh_attr |= obj->oo_attr.la_mode & S_IFMT; smp_mb(); obj->oo_dt.do_lu.lo_header->loh_attr |= LOHA_EXISTS; if (likely(!fid_is_acct(lu_object_fid(&obj->oo_dt.do_lu)))) /* no body operations for accounting objects */ obj->oo_dt.do_body_ops = &osd_body_ops; rc = -nvlist_alloc(&obj->oo_sa_xattr, NV_UNIQUE_NAME, KM_SLEEP); if (rc) GOTO(out, rc); /* initialize LMA */ lustre_lma_init(lma, lu_object_fid(&obj->oo_dt.do_lu), 0, 0); lustre_lma_swab(lma); rc = -nvlist_add_byte_array(obj->oo_sa_xattr, XATTR_NAME_LMA, (uchar_t *)lma, sizeof(*lma)); if (rc) GOTO(out, rc); rc = __osd_sa_xattr_update(env, obj, oh); if (rc) GOTO(out, rc); /* Add new object to inode accounting. * Errors are not considered as fatal */ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid, (attr->la_valid & LA_UID) ? attr->la_uid : 0, 1, oh->ot_tx); if (rc) CERROR("%s: failed to add "DFID" to accounting ZAP for usr %d " "(%d)\n", osd->od_svname, PFID(fid), attr->la_uid, rc); rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid, (attr->la_valid & LA_GID) ? attr->la_gid : 0, 1, oh->ot_tx); if (rc) CERROR("%s: failed to add "DFID" to accounting ZAP for grp %d " "(%d)\n", osd->od_svname, PFID(fid), attr->la_gid, rc); out: if (unlikely(rc && db)) { dmu_object_free(osd->od_os, db->db_object, oh->ot_tx); sa_buf_rele(db, osd_obj_tag); obj->oo_db = NULL; } up_write(&obj->oo_guard); RETURN(rc); }