void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; int error; ASSERT(zp->z_links == 0); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); /* * If this is an attribute directory, purge its contents. */ if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } } else { /* * Free up all the data in the file. We don't do this for * XATTR directories because we need truncate and remove to be * in the same tx, like in zfs_znode_delete(). Otherwise, if * we crash here we'll end up with an inconsistent truncated * zap object in the delete queue. Note a truncated file is * harmless since it only contains user data. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked * set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT3S(error, ==, 0); vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); }
void getfinderinfo(znode_t *zp, cred_t *cr, finderinfo_t *fip) { vnode_t *xdvp = NULLVP; vnode_t *xvp = NULLVP; struct uio *auio = NULL; struct componentname cn; int error; uint64_t xattr = 0; if (sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zp->z_zfsvfs), &xattr, sizeof(xattr)) || (xattr == 0)) { goto nodata; } auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); if (auio == NULL) { goto nodata; } uio_addiov(auio, CAST_USER_ADDR_T(fip), sizeof (finderinfo_t)); /* * Grab the hidden attribute directory vnode. * * XXX - switch to embedded Finder Info when it becomes available */ if ((error = zfs_get_xattrdir(zp, &xdvp, cr, 0))) { goto out; } bzero(&cn, sizeof (cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; cn.cn_nameptr = XATTR_FINDERINFO_NAME; cn.cn_namelen = strlen(cn.cn_nameptr); if ((error = zfs_dirlook(VTOZ(xdvp), cn.cn_nameptr, &xvp, 0, NULL, &cn))) { goto out; } error = dmu_read_uio(zp->z_zfsvfs->z_os, VTOZ(xvp)->z_id, auio, sizeof (finderinfo_t)); out: if (auio) uio_free(auio); if (xvp) vnode_put(xvp); if (xdvp) vnode_put(xdvp); if (error == 0) return; nodata: bzero(fip, sizeof (finderinfo_t)); }
static int osd_object_sa_init(struct osd_object *obj, udmu_objset_t *uos) { int rc; LASSERT(obj->oo_sa_hdl == NULL); LASSERT(obj->oo_db != NULL); rc = -sa_handle_get(uos->os, obj->oo_db->db_object, obj, SA_HDL_PRIVATE, &obj->oo_sa_hdl); if (rc) return rc; /* Cache the xattr object id, valid for the life of the object */ rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_XATTR(uos), &obj->oo_xattr, 8); if (rc == -ENOENT) { obj->oo_xattr = ZFS_NO_OBJECT; rc = 0; } else if (rc) { osd_object_sa_fini(obj); } return rc; }
/* * Link zp into dl. Can only fail if zp has been unlinked. */ int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; uint64_t parent; *xvpp = NULL; if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))) return (error); if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (EDQUOT); } top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); #ifdef HAVE_ZPL if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); #endif /* HAVE_ZPL */ #ifdef DEBUG error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent)); ASSERT(error == 0 && parent == zp->z_id); #endif VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); *xvpp = ZTOV(xzp); return (0); }
int __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, struct osd_thandle *oh) { struct osd_device *osd = osd_obj2dev(obj); dmu_buf_t *xa_zap_db = NULL; dmu_buf_t *xa_data_db = NULL; uint64_t xa_data_obj; sa_handle_t *sa_hdl = NULL; dmu_tx_t *tx = oh->ot_tx; uint64_t size; int rc; LASSERT(obj->oo_sa_hdl); if (obj->oo_xattr == ZFS_NO_OBJECT) { struct lu_attr *la = &osd_oti_get(env)->oti_la; la->la_valid = LA_MODE; la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la, obj->oo_db->db_object, 0); if (rc) return rc; obj->oo_xattr = xa_zap_db->db_object; rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd), &obj->oo_xattr, 8, oh); if (rc) goto out; } rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1, &xa_data_obj); if (rc == 0) { if (fl & LU_XATTR_CREATE) { rc = -EEXIST; goto out; } /* * Entry already exists. * We'll truncate the existing object. */ rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj, &xa_data_db); if (rc) goto out; rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) goto out; rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8); if (rc) goto out_sa; rc = -dmu_free_range(osd->od_os, xa_data_db->db_object, 0, DMU_OBJECT_END, tx); if (rc) goto out_sa; } else if (rc == -ENOENT) { struct lu_attr *la = &osd_oti_get(env)->oti_la; /* * Entry doesn't exist, we need to create a new one and a new * object to store the value. */ if (fl & LU_XATTR_REPLACE) { /* should be ENOATTR according to the * man, but that is undefined here */ rc = -ENODATA; goto out; } la->la_valid = LA_MODE; la->la_mode = S_IFREG | S_IRUGO | S_IWUSR; rc = __osd_object_create(env, obj, &xa_data_db, tx, la, obj->oo_xattr); if (rc) goto out; xa_data_obj = xa_data_db->db_object; rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) goto out; rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1, &xa_data_obj, tx); if (rc) goto out_sa; } else { /* There was an error looking up the xattr name */ goto out; } /* Finally write the xattr value */ dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx); size = buf->lb_len; rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx); out_sa: sa_handle_destroy(sa_hdl); out: if (xa_data_db != NULL) dmu_buf_rele(xa_data_db, FTAG); if (xa_zap_db != NULL) dmu_buf_rele(xa_zap_db, FTAG); return rc; }
void zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) { dmu_buf_t *db = sa_get_db(hdl); znode_t *zp = sa_get_userdata(hdl); zfsvfs_t *zfsvfs = zp->z_zfsvfs; sa_bulk_attr_t bulk[20]; int count = 0; sa_bulk_attr_t sa_attrs[20] = { { 0 } }; zfs_acl_locator_cb_t locate = { 0 }; uint64_t uid, gid, mode, rdev, xattr, parent; uint64_t crtime[2], mtime[2], ctime[2]; zfs_acl_phys_t znode_acl; char scanstamp[AV_SCANSTAMP_SZ]; boolean_t drop_lock = B_FALSE; /* * No upgrade if ACL isn't cached * since we won't know which locks are held * and ready the ACL would require special "locked" * interfaces that would be messy */ if (zp->z_acl_cached == NULL || vnode_islnk(ZTOV(zp))) return; /* * If the z_lock is held and we aren't the owner * the just return since we don't want to deadlock * trying to update the status of z_is_sa. This * file can then be upgraded at a later time. * * Otherwise, we know we are doing the * sa_update() that caused us to enter this function. */ if (mutex_owner(&zp->z_lock) != curthread) { if (mutex_tryenter(&zp->z_lock) == 0) return; else drop_lock = B_TRUE; } /* First do a bulk query of the attributes that aren't cached */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &znode_acl, 88); if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) goto done; /* * While the order here doesn't matter its best to try and organize * it is such a way to pick up an already existing layout number */ count = 0; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL, zp->z_atime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, 8); if (vnode_isblk(zp->z_vnode) || vnode_islnk(zp->z_vnode)) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &zp->z_acl_cached->z_acl_count, 8); if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID) zfs_acl_xform(zp, zp->z_acl_cached, CRED()); locate.cb_aclp = zp->z_acl_cached; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes); if (xattr) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); /* if scanstamp then add scanstamp */ if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) { bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, scanstamp, AV_SCANSTAMP_SZ); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs), NULL, scanstamp, AV_SCANSTAMP_SZ); zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx) == 0); if (znode_acl.z_acl_extern_obj) VERIFY(0 == dmu_object_free(zfsvfs->z_os, znode_acl.z_acl_extern_obj, tx)); zp->z_is_sa = B_TRUE; done: if (drop_lock) mutex_exit(&zp->z_lock); }
void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = ZTOZSB(zp); objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; uint64_t links; int error; ASSERT(ZTOI(zp)->i_nlink == 0); ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* * If this is an attribute directory, purge its contents. */ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } } /* * Free up all the data in the file. We don't do this for directories * because we need truncate and remove to be in the same tx, like in * zfs_znode_delete(). Otherwise, if we crash here we'll end up with * an inconsistent truncated zap object in the delete queue. Note a * truncated file is harmless since it only contains user data. */ if (S_ISREG(ZTOI(zp)->i_mode)) { error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space or we were interrupted by unmount. * Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ clear_nlink(ZTOI(xzp)); /* no more links to it */ links = 0; VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &links, sizeof (links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) zfs_iput_async(ZTOI(xzp)); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr) { zfs_sb_t *zsb = ZTOZSB(zp); znode_t *xzp; dmu_tx_t *tx; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; #ifdef DEBUG uint64_t parent; #endif *xipp = NULL; if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))) return (error); if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (SET_ERROR(EDQUOT)); } tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zsb, tx); #ifdef DEBUG error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (parent)); ASSERT(error == 0 && parent == zp->z_id); #endif VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id, sizeof (xzp->z_id), tx)); if (!zp->z_unlinked) (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); *xipp = ZTOI(xzp); return (0); }
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag) #endif { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t *dl; uint64_t zoid; int error; vnode_t *vp; #ifdef __APPLE__ char *name; u_int8_t *nfc_name = NULL; /* NFC form of name */ int nfc_namesize = 0; #endif *zpp = NULL; *dlpp = NULL; #ifdef __APPLE__ /* Note: cnp will be NULL for ZXATTR case */ name = cnp ? cnp->cn_nameptr : ""; if (cnp) ASSERT(name[cnp->cn_namelen] == '\0'); #endif /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.') && ((name[1] == '\0') || ((name[1] == '.') && (name[2] == '\0'))) || zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) return (EEXIST); #ifdef __APPLE__ /* * Mac OS X: store non-ascii names in UTF-8 NFC (pre-composed) on disk. * * The NFC name ptr is stored in dl->dl_name (allocated here) * and its freed by zfs_dirent_unlock (since dl_namesize != 0). * * Since NFC size will not expand, we can allocate the same sized buffer. */ if (!is_ascii_str(name)) { size_t outlen; nfc_namesize = strlen(name) + 1; nfc_name = kmem_alloc(nfc_namesize, KM_SLEEP); if (utf8_normalizestr((const u_int8_t *)name, nfc_namesize, nfc_name, &outlen, nfc_namesize, UTF_PRECOMPOSED) == 0) { /* Normalization succeeded, switch to NFC name. */ name = (char *)nfc_name; } else { /* Normalization failed, just use input name as-is. */ kmem_free(nfc_name, nfc_namesize); nfc_name = NULL; } } #endif /* * Wait until there are no locks on this name. */ rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); rw_exit(&dzp->z_name_lock); #ifdef __APPLE__ /* Release any unused NFC name before returning */ if (nfc_name) { kmem_free(nfc_name, nfc_namesize); } #endif return (ENOENT); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) if (strcmp(name, dl->dl_name) == 0) break; if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; #ifdef __APPLE__ /* * Keep the NFC name around in dir lock by tagging it * (setting nfc_namesize). */ if (nfc_name) { dl->dl_namesize = nfc_namesize; nfc_name = NULL; /* its now part of the dir lock */ } #endif break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); dl=NULL; } #ifdef __APPLE__ /* * Release any unused NFC name (ie if we found a pre-existing lock entry) */ if (nfc_name) { kmem_free(nfc_name, nfc_namesize); nfc_name = NULL; } #endif if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { #ifdef __APPLE__ /* * Lookup an entry in the vnode name cache * * If the lookup succeeds, the vnode is returned in *vpp, * and a status of -1 is returned. * * If the lookup determines that the name does not exist * (negative caching), a status of ENOENT is returned. * * If the lookup fails, a status of zero is returned. */ switch ( cache_lookup(ZTOV(dzp), &vp, cnp) ) { case -1: break; case ENOENT: vp = DNLC_NO_VNODE; break; default: vp = NULLVP; } #else vp = dnlc_lookup(ZTOV(dzp), name); #endif /* __APPLE__ */ if (vp == DNLC_NO_VNODE) { VN_RELE(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, &zoid); zoid = ZFS_DIRENT_OBJ(zoid); if (error == ENOENT) #ifdef __APPLE__ /* * Add a negative entry into the VFS name cache */ if ((flag & ZNEW) == 0 && (dzp->z_pflags & ZFS_XATTR) == 0 && (cnp) && (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && (cnp->cn_nameiop != RENAME)) { cache_enter(ZTOV(dzp), NULLVP, cnp); } #else dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); #endif /* __APPLE__ */ } } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (EEXIST); } //error = zfs_zget_sans_vnode(zfsvfs, zoid, zpp); error = zfs_zget(zfsvfs, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } else { // Should this be here? //printf("zfs_dir attach 1\n"); //zfs_attach_vnode(*zpp); } if (!(flag & ZXATTR)) #ifdef __APPLE__ if (cnp && cnp->cn_flags & MAKEENTRY) cache_enter(ZTOV(dzp), ZTOV(*zpp), cnp); #else dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); #endif /* __APPLE__ */ } *dlpp = dl; return (0); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; uint64_t xoid; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; *xvpp = NULL; #ifndef __APPLE__ /* In Mac OS X access preflighting is done above the file system. */ if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr)) return (error); #endif /*!__APPLE__*/ if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (EDQUOT); } top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); /* Cleanup any znode we consumed during zfs_mknode() */ printf("ZFS_POSTPROCESS_ZP(xzp);\n"); printf("zfs_dir attach 2\n"); zfs_attach_vnode(xzp); *xvpp = ZTOV(xzp); return (0); }
void zfs_rmnode(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); objset_t *os = zsb->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; uint64_t count; int error; ASSERT(zp->z_links == 0); ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* * If this is an attribute directory, purge its contents. */ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { error = zap_count(os, zp->z_id, &count); if (error) { zfs_znode_dmu_fini(zp); return; } if (count > 0) { taskq_t *taskq; /* * There are still directory entries in this xattr * directory. Let zfs_unlinked_drain() deal with * them to avoid deadlocking this process in the * zfs_purgedir()->zfs_zget()->ilookup() callpath * on the xattr inode's I_FREEING bit. */ taskq = dsl_pool_iput_taskq(dmu_objset_pool(os)); taskq_dispatch(taskq, (task_func_t *) zfs_unlinked_drain, zsb, TQ_SLEEP); zfs_znode_dmu_fini(zp); return; } } /* * Free up all the data in the file. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) iput(ZTOI(xzp)); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; uint64_t parent; *xvpp = NULL; /* * In FreeBSD, access checking for creating an EA is being done * in zfs_setextattr(), */ #ifndef __FreeBSD_kernel__ if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) return (error); #endif if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (SET_ERROR(EDQUOT)); } getnewvnode_reserve(1); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); #ifdef DEBUG error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent)); ASSERT(error == 0 && parent == zp->z_id); #endif VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, sizeof (xzp->z_id), tx)); (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); getnewvnode_drop_reserve(); *xvpp = ZTOV(xzp); return (0); }
void fileattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp) { attrgroup_t fileattr = aip->ai_attrlist->fileattr; void *attrbufptr = *aip->ai_attrbufpp; void *varbufptr = *aip->ai_varbufpp; uint64_t allocsize = 0; cred_t *cr = (cred_t *)vfs_context_ucred(aip->ai_context); if ((ATTR_FILE_ALLOCSIZE | ATTR_FILE_DATAALLOCSIZE) & fileattr && zp) { uint32_t blksize; u_longlong_t nblks; sa_object_size(zp->z_sa_hdl, &blksize, &nblks); allocsize = (uint64_t)512LL * (uint64_t)nblks; } if (ATTR_FILE_LINKCOUNT & fileattr) { uint64_t val; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &val, sizeof(val)) == 0); *((u_int32_t *)attrbufptr) = val; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } if (ATTR_FILE_TOTALSIZE & fileattr) { uint64_t val; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), &val, sizeof(val)) == 0); *((off_t *)attrbufptr) = val; attrbufptr = ((off_t *)attrbufptr) + 1; } if (ATTR_FILE_ALLOCSIZE & fileattr) { *((off_t *)attrbufptr) = allocsize; attrbufptr = ((off_t *)attrbufptr) + 1; } if (ATTR_FILE_IOBLOCKSIZE & fileattr && zp) { *((u_int32_t *)attrbufptr) = zp->z_blksz ? zp->z_blksz : zfsvfs->z_max_blksz; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } if (ATTR_FILE_DEVTYPE & fileattr) { uint64_t mode, val=0; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, sizeof(mode)) == 0); sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &val, sizeof(val)); if (S_ISBLK(mode) || S_ISCHR(mode)) *((u_int32_t *)attrbufptr) = (u_int32_t)val; else *((u_int32_t *)attrbufptr) = 0; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } if (ATTR_FILE_DATALENGTH & fileattr) { uint64_t val; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), &val, sizeof(val)) == 0); *((off_t *)attrbufptr) = val; attrbufptr = ((off_t *)attrbufptr) + 1; } if (ATTR_FILE_DATAALLOCSIZE & fileattr) { *((off_t *)attrbufptr) = allocsize; attrbufptr = ((off_t *)attrbufptr) + 1; } if ((ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE) & fileattr) { uint64_t rsrcsize = 0; uint64_t xattr; if (!sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr, sizeof(xattr)) && xattr) { vnode_t *xdvp = NULLVP; vnode_t *xvp = NULLVP; struct componentname cn; bzero(&cn, sizeof (cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; cn.cn_nameptr = XATTR_RESOURCEFORK_NAME; cn.cn_namelen = strlen(cn.cn_nameptr); /* Grab the hidden attribute directory vnode. */ if (zfs_get_xattrdir(zp, &xdvp, cr, 0) == 0 && zfs_dirlook(VTOZ(xdvp), cn.cn_nameptr, &xvp, 0, NULL, &cn) == 0) { rsrcsize = VTOZ(xvp)->z_size; } if (xvp) vnode_put(xvp); if (xdvp) vnode_put(xdvp); } if (ATTR_FILE_RSRCLENGTH & fileattr) { *((off_t *)attrbufptr) = rsrcsize; attrbufptr = ((off_t *)attrbufptr) + 1; } if (ATTR_FILE_RSRCALLOCSIZE & fileattr) { *((off_t *)attrbufptr) = roundup(rsrcsize, 512); attrbufptr = ((off_t *)attrbufptr) + 1; } } *aip->ai_attrbufpp = attrbufptr; *aip->ai_varbufpp = varbufptr; }
/* * as we don't know FID, we can't use LU object, so this function * partially duplicate __osd_xattr_get() which is built around * LU-object and uses it to cache data like regular EA dnode, etc */ static int osd_find_parent_by_dnode(const struct lu_env *env, struct dt_object *o, struct lu_fid *fid) { struct lustre_mdt_attrs *lma; udmu_objset_t *uos = &osd_obj2dev(osd_dt_obj(o))->od_objset; struct lu_buf buf; sa_handle_t *sa_hdl; nvlist_t *nvbuf = NULL; uchar_t *value; uint64_t dnode; int rc, size; ENTRY; /* first of all, get parent dnode from own attributes */ LASSERT(osd_dt_obj(o)->oo_db); rc = -sa_handle_get(uos->os, osd_dt_obj(o)->oo_db->db_object, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) RETURN(rc); dnode = ZFS_NO_OBJECT; rc = -sa_lookup(sa_hdl, SA_ZPL_PARENT(uos), &dnode, 8); sa_handle_destroy(sa_hdl); if (rc) RETURN(rc); /* now get EA buffer */ rc = __osd_xattr_load(uos, dnode, &nvbuf); if (rc) GOTO(regular, rc); /* XXX: if we get that far.. should we cache the result? */ /* try to find LMA attribute */ LASSERT(nvbuf != NULL); rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, &value, &size); if (rc == 0 && size >= sizeof(*lma)) { lma = (struct lustre_mdt_attrs *)value; lustre_lma_swab(lma); *fid = lma->lma_self_fid; GOTO(out, rc = 0); } regular: /* no LMA attribute in SA, let's try regular EA */ /* first of all, get parent dnode storing regular EA */ rc = -sa_handle_get(uos->os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) GOTO(out, rc); dnode = ZFS_NO_OBJECT; rc = -sa_lookup(sa_hdl, SA_ZPL_XATTR(uos), &dnode, 8); sa_handle_destroy(sa_hdl); if (rc) GOTO(out, rc); CLASSERT(sizeof(*lma) <= sizeof(osd_oti_get(env)->oti_buf)); buf.lb_buf = osd_oti_get(env)->oti_buf; buf.lb_len = sizeof(osd_oti_get(env)->oti_buf); /* now try to find LMA */ rc = __osd_xattr_get_large(env, uos, dnode, &buf, XATTR_NAME_LMA, &size); if (rc == 0 && size >= sizeof(*lma)) { lma = buf.lb_buf; lustre_lma_swab(lma); *fid = lma->lma_self_fid; GOTO(out, rc = 0); } else if (rc < 0) { GOTO(out, rc); } else { GOTO(out, rc = -EIO); } out: if (nvbuf != NULL) nvlist_free(nvbuf); RETURN(rc); }
/* * Lock a directory entry. A dirlock on <dzp, name> protects that name * in dzp's directory zap object. As long as you hold a dirlock, you can * assume two things: (1) dzp cannot be reaped, and (2) no other thread * can change the zap entry for (i.e. link or unlink) this name. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory * ZCILOOK: On a mixed sensitivity file system, * this lookup should be case-insensitive. * ZCIEXACT: On a purely case-insensitive file system, * this lookup should be case-sensitive. * ZRENAMING: we are locking for renaming, force narrow locks * ZHAVELOCK: Don't grab the z_name_lock for this call. The * current thread already holds it. * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * dlpp - pointer to the dirlock for this entry (NULL on error) * direntflags - (case-insensitive lookup only) * flags if multiple case-sensitive matches exist in directory * realpnp - (case-insensitive lookup only) * actual name matched within the directory * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. * NOTE: For case-insensitive file systems we take wide locks (see below), * but return znode pointers to a single match. */ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t *dl; boolean_t update; boolean_t exact; uint64_t zoid; vnode_t *vp = NULL; int error = 0; int cmpflags; *zpp = NULL; *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) || (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)) return (EEXIST); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zfsvfs->z_case and zfsvfs->z_norm fields. These choices * affect what vnodes can be cached in the DNLC, how we * perform zap lookups, and the "width" of our dirlocks. * * A normal dirlock locks a single name. Note that with * normalization a name can be composed multiple ways, but * when normalized, these names all compare equal. A wide * dirlock locks multiple names. We need these when the file * system is supporting mixed-mode access. It is sometimes * necessary to lock all case permutations of file name at * once so that simultaneous case-insensitive/case-sensitive * behaves as rationally as possible. */ /* * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. */ exact = ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); /* * Only look in or update the DNLC if we are looking for the * name on a file system that does not require normalization * or case folding. We can also look there if we happen to be * on a non-normalizing, mixed sensitivity file system IF we * are looking for the exact name. * * Maybe can add TO-UPPERed version of name to dnlc in ci-only * case for performance improvement? */ update = !zfsvfs->z_norm || ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); /* * ZRENAMING indicates we are in a situation where we should * take narrow locks regardless of the file system's * preferences for normalizing and case folding. This will * prevent us deadlocking trying to grab the same wide lock * twice if the two names happen to be case-insensitive * matches. */ if (flag & ZRENAMING) cmpflags = 0; else cmpflags = zfsvfs->z_norm; /* * Wait until there are no locks on this name. * * Don't grab the the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); if (!(flag & ZHAVELOCK)) rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (ENOENT); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, U8_UNICODE_LATEST, &error) == 0) || error != 0) break; } if (error != 0) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (ENOENT); } if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namelock = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); } /* * If the z_name_lock was NOT held for this dirlock record it. */ if (flag & ZHAVELOCK) dl->dl_namelock = 1; if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { if (update) vp = dnlc_lookup(ZTOV(dzp), name); if (vp == DNLC_NO_VNODE) { VN_RELE(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zfs_match_find(zfsvfs, dzp, name, exact, update, direntflags, realpnp, &zoid); } } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (EEXIST); } error = zfs_zget(zfsvfs, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } if (!(flag & ZXATTR) && update) dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); } *dlpp = dl; return (0); }
/* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfs_sb_t *zsb = ZTOZSB(dzp); dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; if (zsb->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); } obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (S_ISDIR(vap->va_mode)) { if (zsb->z_replay) { VERIFY0(zap_create_claim_norm(zsb->z_os, obj, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx)); } else { obj = zap_create_norm(zsb->z_os, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { if (zsb->z_replay) { VERIFY0(dmu_object_claim(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx)); } else { obj = dmu_object_alloc(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } ZFS_OBJ_HOLD_ENTER(zsb, obj); VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zsb->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) rdev = vap->va_rdev; parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & ATTR_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl, ZTOI(dzp)); VERIFY(*zpp != NULL); VERIFY(dzp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zsb, obj); }
void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; int error; ASSERT(zp->z_links == 0); ASSERT(ZTOV(zp)->v_count == 0); /* * If this is an attribute directory, purge its contents. */ if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } } /* * Free up all the data in the file. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); zfs_znode_free(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) VN_RELE(ZTOV(xzp)); }
/* * Look up a directory entry under a locked vnode. * dvp being locked gives us a guarantee that there are no concurrent * modification of the directory and, thus, if a node can be found in * the directory, then it must not be unlinked. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZXATTR: we want dzp's xattr directory * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. */ int zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; boolean_t exact; uint64_t zoid; vnode_t *vp = NULL; int error = 0; ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); *zpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) return (SET_ERROR(EEXIST)); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zfsvfs->z_case and zfsvfs->z_norm fields. These choices * affect how we perform zap lookups. * * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. * * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE * because in that case MT_EXACT and MT_FIRST should produce exactly * the same result. */ exact = zfsvfs->z_case == ZFS_CASE_MIXED; if (dzp->z_unlinked && !(flag & ZXATTR)) return (ENOENT); if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid); } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { return (error); } } else { if (flag & ZNEW) { return (SET_ERROR(EEXIST)); } error = zfs_zget(zfsvfs, zoid, zpp); if (error) return (error); ASSERT(!(*zpp)->z_unlinked); } return (0); }