/* * Return a znode for the extended attribute directory for zp. * ** If the directory does not already exist, it is created ** * * IN: zp - znode to obtain attribute directory from * cr - credentials of caller * flags - flags from the VOP_LOOKUP call * * OUT: xipp - pointer to extended attribute znode * * RETURN: 0 on success * error number on failure */ int zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags) { zfsvfs_t *zfsvfs = ZTOZSB(zp); znode_t *xzp; zfs_dirlock_t *dl; vattr_t va; int error; top: error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); if (error) return (error); if (xzp != NULL) { *xipp = ZTOI(xzp); zfs_dirent_unlock(dl); return (0); } if (!(flags & CREATE_XATTR_DIR)) { zfs_dirent_unlock(dl); return (SET_ERROR(ENOENT)); } if (zfs_is_readonly(zfsvfs)) { zfs_dirent_unlock(dl); return (SET_ERROR(EROFS)); } /* * The ability to 'create' files in an attribute * directory comes from the write_xattr permission on the base file. * * The ability to 'search' an attribute directory requires * read_xattr permission on the base file. * * Once in a directory the ability to read/write attributes * is controlled by the permissions on the attribute file. */ va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID; va.va_mode = S_IFDIR | S_ISVTX | 0777; zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); va.va_dentry = NULL; error = zfs_make_xattrdir(zp, &va, xipp, cr); zfs_dirent_unlock(dl); if (error == ERESTART) { /* NB: we already did dmu_tx_wait() if necessary */ goto top; } return (error); }
/* * Return a znode for the extended attribute directory for zp. * ** If the directory does not already exist, it is created ** * * IN: zp - znode to obtain attribute directory from * cr - credentials of caller * * OUT: xzpp - pointer to extended attribute znode * * RETURN: 0 on success * error number on failure */ int zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; zfs_dirlock_t *dl; vattr_t va; int error; top: error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR); if (error) return (error); if (xzp != NULL) { *xvpp = ZTOV(xzp); zfs_dirent_unlock(dl); return (0); } ASSERT(zp->z_phys->zp_xattr == 0); if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { zfs_dirent_unlock(dl); return (EROFS); } /* * The ability to 'create' files in an attribute * directory comes from the write_xattr permission on the base file. * * The ability to 'search' an attribute directory requires * read_xattr permission on the base file. * * Once in a directory the ability to read/write attributes * is controlled by the permissions on the attribute file. */ va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; va.va_type = VDIR; va.va_mode = S_IFDIR | S_ISVTX | 0777; va.va_uid = (uid_t)zp->z_phys->zp_uid; va.va_gid = (gid_t)zp->z_phys->zp_gid; error = zfs_make_xattrdir(zp, &va, xvpp, cr); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { /* NB: we already did dmu_tx_wait() if necessary */ goto top; } return (error); }
/* * Look up an entry in a directory. * * NOTE: '.' and '..' are handled as special cases because * no directory entries are actually stored for them. If this is * the root of a filesystem, then '.zfs' is also treated as a * special pseudo-directory. */ int zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags, int *deflg, pathname_t *rpnp) { zfs_dirlock_t *dl; znode_t *zp; int error = 0; uint64_t parent; if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *ipp = ZTOI(dzp); igrab(*ipp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfs_sb_t *zsb = ZTOZSB(dzp); /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0) return (error); #ifdef HAVE_SNAPSHOT if (parent == dzp->z_id && zsb->z_parent != zsb) { error = zfsctl_root_lookup(zsb->z_parent->z_ctldir, "snapshot", ipp, NULL, 0, NULL, kcred, NULL, NULL, NULL); return (error); } #endif /* HAVE_SNAPSHOT */ rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zsb, parent, &zp); if (error == 0) *ipp = ZTOI(zp); rw_exit(&dzp->z_parent_lock); #ifdef HAVE_SNAPSHOT } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *ipp = zfsctl_root(dzp); #endif /* HAVE_SNAPSHOT */ } else { int zf; zf = ZEXISTS | ZSHARED; if (flags & FIGNORECASE) zf |= ZCILOOK; error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); if (error == 0) { *ipp = ZTOI(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } rpnp = NULL; } if ((flags & FIGNORECASE) && rpnp && !error) (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); return (error); }
/* * Look up an entry in a directory. * * NOTE: '.' and '..' are handled as special cases because * no directory entries are actually stored for them. If this is * the root of a filesystem, then '.zfs' is also treated as a * special pseudo-directory. */ int zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, int *deflg, pathname_t *rpnp) { zfs_dirlock_t *dl; znode_t *zp; int error = 0; if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *vpp = ZTOV(dzp); VN_HOLD(*vpp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if (dzp->z_phys->zp_parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred, NULL, NULL, NULL); return (error); } rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp); if (error == 0) *vpp = ZTOV(zp); rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *vpp = zfsctl_root(dzp); } else { int zf; zf = ZEXISTS | ZSHARED; if (flags & FIGNORECASE) zf |= ZCILOOK; error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); if (error == 0) { *vpp = ZTOV(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } rpnp = NULL; } if ((flags & FIGNORECASE) && rpnp && !error) (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); return (error); }
zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp) #endif { zfs_dirlock_t *dl; znode_t *zp; int error = 0; uint64_t parent; #ifdef __APPLE__ char *name = cnp->cn_nameptr; #endif if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *vpp = ZTOV(dzp); VN_HOLD(*vpp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) return (error); if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred /*, NULL, NULL, NULL*/); return (error); } rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zfsvfs, parent, &zp); if (error == 0) *vpp = ZTOV(zp); rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *vpp = zfsctl_root(dzp); } else { #ifdef __APPLE__ error = zfs_dirent_lock(&dl, dzp, cnp, &zp, ZEXISTS | ZSHARED); #else error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED); #endif if (error == 0) { *vpp = ZTOV(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } } return (error); }
/* * Look up an entry in a directory. * * NOTE: '.' and '..' are handled as special cases because * no directory entries are actually stored for them. If this is * the root of a filesystem, then '.zfs' is also treated as a * special pseudo-directory. */ int zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp) { zfs_dirlock_t *dl; znode_t *zp; int error = 0; if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *vpp = ZTOV(dzp); VN_HOLD(*vpp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if (dzp->z_phys->zp_parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred); return (error); } rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp); if (error == 0) *vpp = ZTOV(zp); rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *vpp = zfsctl_root(dzp); } else { error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED); if (error == 0) { *vpp = ZTOV(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } } return (error); }
/* * Lock a directory entry. A dirlock on <dzp, name> protects that name * in dzp's directory zap object. As long as you hold a dirlock, you can * assume two things: (1) dzp cannot be reaped, and (2) no other thread * can change the zap entry for (i.e. link or unlink) this name. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory * ZCILOOK: On a mixed sensitivity file system, * this lookup should be case-insensitive. * ZCIEXACT: On a purely case-insensitive file system, * this lookup should be case-sensitive. * ZRENAMING: we are locking for renaming, force narrow locks * ZHAVELOCK: Don't grab the z_name_lock for this call. The * current thread already holds it. * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * dlpp - pointer to the dirlock for this entry (NULL on error) * direntflags - (case-insensitive lookup only) * flags if multiple case-sensitive matches exist in directory * realpnp - (case-insensitive lookup only) * actual name matched within the directory * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. * NOTE: For case-insensitive file systems we take wide locks (see below), * but return znode pointers to a single match. */ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t *dl; boolean_t update; boolean_t exact; uint64_t zoid; vnode_t *vp = NULL; int error = 0; int cmpflags; *zpp = NULL; *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) || (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)) return (EEXIST); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zfsvfs->z_case and zfsvfs->z_norm fields. These choices * affect what vnodes can be cached in the DNLC, how we * perform zap lookups, and the "width" of our dirlocks. * * A normal dirlock locks a single name. Note that with * normalization a name can be composed multiple ways, but * when normalized, these names all compare equal. A wide * dirlock locks multiple names. We need these when the file * system is supporting mixed-mode access. It is sometimes * necessary to lock all case permutations of file name at * once so that simultaneous case-insensitive/case-sensitive * behaves as rationally as possible. */ /* * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. */ exact = ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); /* * Only look in or update the DNLC if we are looking for the * name on a file system that does not require normalization * or case folding. We can also look there if we happen to be * on a non-normalizing, mixed sensitivity file system IF we * are looking for the exact name. * * Maybe can add TO-UPPERed version of name to dnlc in ci-only * case for performance improvement? */ update = !zfsvfs->z_norm || ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); /* * ZRENAMING indicates we are in a situation where we should * take narrow locks regardless of the file system's * preferences for normalizing and case folding. This will * prevent us deadlocking trying to grab the same wide lock * twice if the two names happen to be case-insensitive * matches. */ if (flag & ZRENAMING) cmpflags = 0; else cmpflags = zfsvfs->z_norm; /* * Wait until there are no locks on this name. * * Don't grab the the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); if (!(flag & ZHAVELOCK)) rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (ENOENT); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, U8_UNICODE_LATEST, &error) == 0) || error != 0) break; } if (error != 0) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (ENOENT); } if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namelock = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); } /* * If the z_name_lock was NOT held for this dirlock record it. */ if (flag & ZHAVELOCK) dl->dl_namelock = 1; if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { if (update) vp = dnlc_lookup(ZTOV(dzp), name); if (vp == DNLC_NO_VNODE) { VN_RELE(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zfs_match_find(zfsvfs, dzp, name, exact, update, direntflags, realpnp, &zoid); } } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (EEXIST); } error = zfs_zget(zfsvfs, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } if (!(flag & ZXATTR) && update) dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); } *dlpp = dl; return (0); }
/* * Return a znode for the extended attribute directory for zp. * ** If the directory does not already exist, it is created ** * * IN: zp - znode to obtain attribute directory from * cr - credentials of caller * flags - flags from the VOP_LOOKUP call * * OUT: xzpp - pointer to extended attribute znode * * RETURN: 0 on success * error number on failure */ int zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; zfs_dirlock_t *dl; vattr_t va; int error; top: error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); if (error) return (error); if (xzp != NULL) { *xvpp = ZTOV(xzp); zfs_dirent_unlock(dl); return (0); } if (!(flags & CREATE_XATTR_DIR)) { zfs_dirent_unlock(dl); #ifdef illumos return (SET_ERROR(ENOENT)); #else return (SET_ERROR(ENOATTR)); #endif } if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { zfs_dirent_unlock(dl); return (SET_ERROR(EROFS)); } /* * The ability to 'create' files in an attribute * directory comes from the write_xattr permission on the base file. * * The ability to 'search' an attribute directory requires * read_xattr permission on the base file. * * Once in a directory the ability to read/write attributes * is controlled by the permissions on the attribute file. */ va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; va.va_type = VDIR; va.va_mode = S_IFDIR | S_ISVTX | 0777; zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); error = zfs_make_xattrdir(zp, &va, xvpp, cr); zfs_dirent_unlock(dl); if (error == ERESTART) { /* NB: we already did dmu_tx_wait() if necessary */ goto top; } if (error == 0) VOP_UNLOCK(*xvpp, 0); return (error); }
/* * Lock a directory entry. A dirlock on <dzp, name> protects that name * in dzp's directory zap object. As long as you hold a dirlock, you can * assume two things: (1) dzp cannot be reaped, and (2) no other thread * can change the zap entry for (i.e. link or unlink) this name. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * dlpp - pointer to the dirlock for this entry (NULL on error) * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. */ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t *dl; uint64_t zoid; int error; vnode_t *vp; *zpp = NULL; *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) return (EEXIST); /* * Wait until there are no locks on this name. */ mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_reap) { mutex_exit(&dzp->z_lock); return (ENOENT); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) if (strcmp(name, dl->dl_name) == 0) break; if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); } if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { zoid = dzp->z_phys->zp_xattr; error = (zoid == 0 ? ENOENT : 0); } else { vp = dnlc_lookup(ZTOV(dzp), name); if (vp == DNLC_NO_VNODE) { VN_RELE(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, &zoid); if (error == ENOENT) dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); } } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (EEXIST); } error = zfs_zget(zfsvfs, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } if (!(flag & ZXATTR)) dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); } *dlpp = dl; return (0); }
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag) #endif { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t *dl; uint64_t zoid; int error; vnode_t *vp; #ifdef __APPLE__ char *name; u_int8_t *nfc_name = NULL; /* NFC form of name */ int nfc_namesize = 0; #endif *zpp = NULL; *dlpp = NULL; #ifdef __APPLE__ /* Note: cnp will be NULL for ZXATTR case */ name = cnp ? cnp->cn_nameptr : ""; if (cnp) ASSERT(name[cnp->cn_namelen] == '\0'); #endif /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.') && ((name[1] == '\0') || ((name[1] == '.') && (name[2] == '\0'))) || zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) return (EEXIST); #ifdef __APPLE__ /* * Mac OS X: store non-ascii names in UTF-8 NFC (pre-composed) on disk. * * The NFC name ptr is stored in dl->dl_name (allocated here) * and its freed by zfs_dirent_unlock (since dl_namesize != 0). * * Since NFC size will not expand, we can allocate the same sized buffer. */ if (!is_ascii_str(name)) { size_t outlen; nfc_namesize = strlen(name) + 1; nfc_name = kmem_alloc(nfc_namesize, KM_SLEEP); if (utf8_normalizestr((const u_int8_t *)name, nfc_namesize, nfc_name, &outlen, nfc_namesize, UTF_PRECOMPOSED) == 0) { /* Normalization succeeded, switch to NFC name. */ name = (char *)nfc_name; } else { /* Normalization failed, just use input name as-is. */ kmem_free(nfc_name, nfc_namesize); nfc_name = NULL; } } #endif /* * Wait until there are no locks on this name. */ rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); rw_exit(&dzp->z_name_lock); #ifdef __APPLE__ /* Release any unused NFC name before returning */ if (nfc_name) { kmem_free(nfc_name, nfc_namesize); } #endif return (ENOENT); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) if (strcmp(name, dl->dl_name) == 0) break; if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; #ifdef __APPLE__ /* * Keep the NFC name around in dir lock by tagging it * (setting nfc_namesize). */ if (nfc_name) { dl->dl_namesize = nfc_namesize; nfc_name = NULL; /* its now part of the dir lock */ } #endif break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); dl=NULL; } #ifdef __APPLE__ /* * Release any unused NFC name (ie if we found a pre-existing lock entry) */ if (nfc_name) { kmem_free(nfc_name, nfc_namesize); nfc_name = NULL; } #endif if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { #ifdef __APPLE__ /* * Lookup an entry in the vnode name cache * * If the lookup succeeds, the vnode is returned in *vpp, * and a status of -1 is returned. * * If the lookup determines that the name does not exist * (negative caching), a status of ENOENT is returned. * * If the lookup fails, a status of zero is returned. */ switch ( cache_lookup(ZTOV(dzp), &vp, cnp) ) { case -1: break; case ENOENT: vp = DNLC_NO_VNODE; break; default: vp = NULLVP; } #else vp = dnlc_lookup(ZTOV(dzp), name); #endif /* __APPLE__ */ if (vp == DNLC_NO_VNODE) { VN_RELE(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, &zoid); zoid = ZFS_DIRENT_OBJ(zoid); if (error == ENOENT) #ifdef __APPLE__ /* * Add a negative entry into the VFS name cache */ if ((flag & ZNEW) == 0 && (dzp->z_pflags & ZFS_XATTR) == 0 && (cnp) && (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && (cnp->cn_nameiop != RENAME)) { cache_enter(ZTOV(dzp), NULLVP, cnp); } #else dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); #endif /* __APPLE__ */ } } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (EEXIST); } //error = zfs_zget_sans_vnode(zfsvfs, zoid, zpp); error = zfs_zget(zfsvfs, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } else { // Should this be here? //printf("zfs_dir attach 1\n"); //zfs_attach_vnode(*zpp); } if (!(flag & ZXATTR)) #ifdef __APPLE__ if (cnp && cnp->cn_flags & MAKEENTRY) cache_enter(ZTOV(dzp), ZTOV(*zpp), cnp); #else dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); #endif /* __APPLE__ */ } *dlpp = dl; return (0); }
/* * Return a znode for the extended attribute directory for zp. * ** If the directory does not already exist, it is created ** * * IN: zp - znode to obtain attribute directory from * cr - credentials of caller * flags - flags from the VOP_LOOKUP call * * OUT: xzpp - pointer to extended attribute znode * * RETURN: 0 on success * error number on failure */ int zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; zfs_dirlock_t *dl; vattr_t va; int error; //printf("zfs_get_xattrdir\n"); top: #ifdef __APPLE__ error = zfs_dirent_lock(&dl, zp, NULL, &xzp, ZXATTR); #else error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR); #endif if (error) return (error); if (xzp != NULL) { *xvpp = ZTOV(xzp); zfs_dirent_unlock(dl); return (0); } //ASSERT(zp->z_phys->zp_xattr == 0); if (!(flags & CREATE_XATTR_DIR)) { zfs_dirent_unlock(dl); return (ENOENT); } #ifdef __APPLE__ if (vfs_isrdonly(zfsvfs->z_vfs)) #else if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) #endif { zfs_dirent_unlock(dl); return (EROFS); } /* * The ability to 'create' files in an attribute * directory comes from the write_xattr permission on the base file. * * The ability to 'search' an attribute directory requires * read_xattr permission on the base file. * * Once in a directory the ability to read/write attributes * is controlled by the permissions on the attribute file. */ va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; va.va_type = VDIR; va.va_mode = S_IFDIR | S_ISVTX | 0777; zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); error = zfs_make_xattrdir(zp, &va, xvpp, cr); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { /* NB: we already did dmu_tx_wait() if necessary */ goto top; } return (error); }
/* * Lookup/Create an extended attribute entry. * * Input arguments: * dzp - znode for hidden attribute directory * name - name of attribute * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * * Output arguments: * vpp - pointer to the vnode for the entry (NULL if there isn't one) * * Return value: 0 on success or errno value on failure. */ int zfs_obtain_xattr(znode_t *dzp, const char *name, mode_t mode, cred_t *cr, vnode_t **vpp, int flag) { znode_t *xzp = NULL; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; struct vnode_attr vattr; int error; struct componentname cn; zfs_acl_ids_t acl_ids; /* zfs_dirent_lock() expects a component name */ bzero(&cn, sizeof (cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; cn.cn_nameptr = (char *)name; cn.cn_namelen = strlen(name); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; VATTR_INIT(&vattr); VATTR_SET(&vattr, va_type, VREG); VATTR_SET(&vattr, va_mode, mode & ~S_IFMT); if ((error = zfs_acl_ids_create(dzp, 0, &vattr, cr, NULL, &acl_ids)) != 0) { ZFS_EXIT(zfsvfs); return (error); } top: /* Lock the attribute entry name. */ if ( (error = zfs_dirent_lock(&dl, dzp, (char *)name, &xzp, flag, NULL, &cn)) ) { goto out; } /* If the name already exists, we're done. */ if (xzp != NULL) { zfs_dirent_unlock(dl); goto out; } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); //dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); //dmu_tx_hold_bonus(tx, dzp->z_id); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, (char *)name); #if 1 // FIXME if (dzp->z_pflags & ZFS_INHERIT_ACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); } #endif zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); goto out; } zfs_mknode(dzp, &vattr, tx, cr, 0, &xzp, &acl_ids); /* ASSERT(xzp->z_id == zoid); */ (void) zfs_link_create(dl, xzp, tx, ZNEW); zfs_log_create(zilog, tx, TX_CREATE, dzp, xzp, (char *)name, NULL /* vsecp */, 0 /*acl_ids.z_fuidp*/, &vattr); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_znode_wait_vnode(xzp); zfs_dirent_unlock(dl); out: if (error == EEXIST) error = ENOATTR; if (xzp) *vpp = ZTOV(xzp); ZFS_EXIT(zfsvfs); return (error); }