static void zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) { znode_t *nzp; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); mutex_enter(&zp->z_lock); ASSERT(zp->z_dbuf == NULL); zp->z_dbuf = db; nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); /* * there should be no * concurrent zgets on this object. */ if (nzp != NULL) panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); /* * Slap on VROOT if we are the root znode */ if (zp->z_id == zfsvfs->z_root) ZTOV(zp)->v_flag |= VROOT; mutex_exit(&zp->z_lock); vn_exists(ZTOV(zp)); }
static int zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ vsecattr_t vsa; znode_t *zp; int error; if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_oldace_byteswap(ace, lr->lr_aclcnt); } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT; vsa.vsa_aclcnt = lr->lr_aclcnt; vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; vsa.vsa_aclflags = 0; vsa.vsa_aclentp = ace; #ifdef TODO error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); #else panic("%s:%u: unsupported condition", __func__, __LINE__); #endif VN_RELE(ZTOV(zp)); return (error); }
static int zfs_replay_remove(void *arg1, char *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = (zfsvfs_t *)arg1; lr_remove_t *lr = (lr_remove_t *)arg2; char *name = (char *)(lr + 1); /* name follows lr_remove_t */ znode_t *dzp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) return (error); if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; switch ((int)lr->lr_common.lrc_txtype) { case TX_REMOVE: error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg); break; case TX_RMDIR: error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg); break; default: error = ENOTSUP; } VN_RELE(ZTOV(dzp)); return (error); }
static int zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) { char *data = (char *)(lr + 1); /* data follows lr_write_t */ znode_t *zp; int error; ssize_t resid; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log writes out of order, it's possible the * file has been removed. In this case just drop the write * and return success. */ if (error == ENOENT) error = 0; return (error); } error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length, lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); VN_RELE(ZTOV(zp)); return (error); }
static int zfs_replay_remove(zfsvfs_t *zsb, lr_remove_t *lr, boolean_t byteswap) { char *name = (char *)(lr + 1); /* name follows lr_remove_t */ znode_t *dzp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); zfs_znode_wait_vnode(dzp); if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; switch ((int)lr->lr_common.lrc_txtype) { case TX_REMOVE: error = zfs_remove(ZTOV(dzp), name, kcred, NULL, vflg); break; case TX_RMDIR: error = zfs_rmdir(ZTOV(dzp), name, NULL, kcred, NULL, vflg); break; default: error = SET_ERROR(ENOTSUP); } vnode_put(ZTOV(dzp)); return (error); }
static int zfs_replay_rename(void *arg1, char *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = (zfsvfs_t *)arg1; lr_rename_t *lr = (lr_rename_t *)arg2; char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) return (error); if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { VN_RELE(ZTOV(sdzp)); return (error); } if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, NULL, vflg); VN_RELE(ZTOV(tdzp)); VN_RELE(ZTOV(sdzp)); return (error); }
static int zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) { znode_t *zp; flock64_t fl; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log truncates out of order, it's possible the * file has been removed. In this case just drop the truncate * and return success. */ if (error == ENOENT) error = 0; return (error); } bzero(&fl, sizeof (fl)); fl.l_type = F_WRLCK; fl.l_whence = 0; fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, lr->lr_offset, kcred, NULL); VN_RELE(ZTOV(zp)); return (error); }
static int zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ vsecattr_t vsa; znode_t *zp; int error; if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_oldace_byteswap(ace, lr->lr_aclcnt); } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log acls out of order, it's possible the * file has been removed. In this case just drop the acl * and return success. */ if (error == ENOENT) error = 0; return (error); } bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT; vsa.vsa_aclcnt = lr->lr_aclcnt; vsa.vsa_aclentp = ace; error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); VN_RELE(ZTOV(zp)); return (error); }
/* ARGSUSED */ static int zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct, int flags) { zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; znode_t *dzp; int error; ZFS_ENTER(zfsvfs); if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); return (ENOTSUP); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags); VN_RELE(ZTOV(dzp)); } else { *eofp = 1; error = ENOENT; } ZFS_EXIT(zfsvfs); return (error); }
/* ARGSUSED */ static int zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, int *direntflags, pathname_t *realpnp) { zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; znode_t *dzp; int error; ZFS_ENTER(zfsvfs); if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { ZFS_EXIT(zfsvfs); return (0); } if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); return (ENOTSUP); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp, flags, rdir, cr, ct, direntflags, realpnp); VN_RELE(ZTOV(dzp)); ZFS_EXIT(zfsvfs); return (error); }
static int zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) { #ifdef sun znode_t *zp; flock64_t fl; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); bzero(&fl, sizeof (fl)); fl.l_type = F_WRLCK; fl.l_whence = 0; fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, lr->lr_offset, kcred, NULL); VN_RELE(ZTOV(zp)); return (error); #else /* !sun */ ZFS_LOG(0, "Unexpected code path, report to [email protected]"); return (EOPNOTSUPP); #endif /* !sun */ }
void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; int error; ASSERT(zp->z_links == 0); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); /* * If this is an attribute directory, purge its contents. */ if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } } else { /* * Free up all the data in the file. We don't do this for * XATTR directories because we need truncate and remove to be * in the same tx, like in zfs_znode_delete(). Otherwise, if * we crash here we'll end up with an inconsistent truncated * zap object in the delete queue. Note a truncated file is * harmless since it only contains user data. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked * set. */ zfs_znode_dmu_fini(zp); zfs_znode_free(zp); return; } } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); ASSERT3S(error, ==, 0); vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); }
static int zfs_replay_truncate(zfsvfs_t *zsb, lr_truncate_t *lr, boolean_t byteswap) { znode_t *zp; struct flock fl; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); zfs_znode_wait_vnode(zp); bzero(&fl, sizeof (fl)); fl.l_type = F_WRLCK; fl.l_whence = 0; fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; error = zfs_space(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, lr->lr_offset, kcred, NULL); vnode_put(ZTOV(zp)); return (error); }
static int zfs_replay_acl_v0(zfsvfs_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ vsecattr_t vsa; znode_t *zp; int error; if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_oldace_byteswap(ace, lr->lr_aclcnt); } if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); zfs_znode_wait_vnode(zp); bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT; vsa.vsa_aclcnt = lr->lr_aclcnt; vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; vsa.vsa_aclflags = 0; vsa.vsa_aclentp = ace; error = zfs_setsecattr(ZTOV(zp), &vsa, 0, kcred, NULL); vnode_put(ZTOV(zp)); return (error); }
static int zfs_replay_write(zfsvfs_t *zsb, lr_write_t *lr, boolean_t byteswap) { char *data = (char *)(lr + 1); /* data follows lr_write_t */ znode_t *zp; int error; uint64_t eod, offset, length; ssize_t resid; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) { /* * As we can log writes out of order, it's possible the * file has been removed. In this case just drop the write * and return success. */ if (error == ENOENT) error = 0; return (error); } zfs_znode_wait_vnode(zp); offset = lr->lr_offset; length = lr->lr_length; eod = offset + length; /* end of data for this write */ /* * This may be a write from a dmu_sync() for a whole block, * and may extend beyond the current end of the file. * We can't just replay what was written for this TX_WRITE as * a future TX_WRITE2 may extend the eof and the data for that * write needs to be there. So we write the whole block and * reduce the eof. This needs to be done within the single dmu * transaction created within vn_rdwr -> zfs_write. So a possible * new end of file is passed through in zsb->z_replay_eof */ zsb->z_replay_eof = 0; /* 0 means don't change end of file */ /* If it's a dmu_sync() block, write the whole block */ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); if (length < blocksize) { offset -= offset % blocksize; length = blocksize; } if (zp->z_size < eod) zsb->z_replay_eof = eod; } error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); VN_RELE(ZTOV(zp)); zsb->z_replay_eof = 0; /* safety */ return (error); }
static int zfs_replay_rename(zfsvfs_t *zsb, lr_rename_t *lr, boolean_t byteswap) { char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zsb, lr->lr_sdoid, &sdzp)) != 0) return (error); zfs_znode_wait_vnode(sdzp); if ((error = zfs_zget(zsb, lr->lr_tdoid, &tdzp)) != 0) { vnode_put(ZTOV(sdzp)); return (error); } zfs_znode_wait_vnode(tdzp); if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; error = zfs_rename(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, NULL,vflg); vnode_put(ZTOV(tdzp)); vnode_put(ZTOV(sdzp)); return (error); }
static int zfs_replay_link(zfsvfs_t *zsb, lr_link_t *lr, boolean_t byteswap) { char *name = (char *)(lr + 1); /* name follows lr_link_t */ znode_t *dzp, *zp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); zfs_znode_wait_vnode(dzp); if ((error = zfs_zget(zsb, lr->lr_link_obj, &zp)) != 0) { vnode_put(ZTOV(dzp)); return (error); } zfs_znode_wait_vnode(zp); if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; error = zfs_link(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); vnode_put(ZTOV(zp)); vnode_put(ZTOV(dzp)); return (error); }
static int zfs_replay_truncate(void *arg1, char *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = (zfsvfs_t *)arg1; lr_truncate_t *lr = (lr_truncate_t *)arg2; znode_t *zp; flock64_t fl; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); bzero(&fl, sizeof (fl)); fl.l_type = F_WRLCK; fl.l_whence = 0; fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, lr->lr_offset, kcred, NULL); VN_RELE(ZTOV(zp)); return (error); }
static int zfs_replay_link(void *arg1, char *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = (zfsvfs_t *)arg1; lr_link_t *lr = (lr_link_t *)arg2; char *name = (char *)(lr + 1); /* name follows lr_link_t */ znode_t *dzp, *zp; int error; int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) return (error); if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { VN_RELE(ZTOV(dzp)); return (error); } if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); VN_RELE(ZTOV(zp)); VN_RELE(ZTOV(dzp)); return (error); }
static int zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) { znode_t *zp; xvattr_t xva; vattr_t *vap = &xva.xva_vattr; int error; void *start; xva_init(&xva); if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); if ((lr->lr_mask & AT_XVATTR) && zfsvfs->z_version >= ZPL_VERSION_INITIAL) zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log setattrs out of order, it's possible the * file has been removed. In this case just drop the setattr * and return success. */ if (error == ENOENT) error = 0; return (error); } zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); vap->va_size = lr->lr_size; ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); /* * Fill in xvattr_t portions if necessary. */ start = (lr_setattr_t *)(lr + 1); if (vap->va_mask & AT_XVATTR) { zfs_replay_xvattr((lr_attr_t *)start, &xva); start = (caddr_t)start + ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); } else xva.xva_vattr.va_mask &= ~AT_XVATTR; zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; VN_RELE(ZTOV(zp)); return (error); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } /* ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); */ tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
/* * Unlink zp from dl, and mark zp for reaping if this was the last link. * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). * If 'reaped_ptr' is NULL, we put reaped znodes on the delete queue. * If it's non-NULL, we use it to indicate whether the znode needs reaping, * and it's the caller's job to do it. */ int zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, int *reaped_ptr) { znode_t *dzp = dl->dl_dzp; vnode_t *vp = ZTOV(zp); int zp_is_dir = (vp->v_type == VDIR); int reaped = 0; int error; dnlc_remove(ZTOV(dzp), dl->dl_name); if (!(flag & ZRENAMING)) { dmu_buf_will_dirty(zp->z_dbuf, tx); if (vn_vfswlock(vp)) /* prevent new mounts on zp */ return (EBUSY); if (vn_ismntpt(vp)) { /* don't remove mount point */ vn_vfsunlock(vp); return (EBUSY); } mutex_enter(&zp->z_lock); if (zp_is_dir && !zfs_dirempty(zp)) { /* dir not empty */ mutex_exit(&zp->z_lock); vn_vfsunlock(vp); return (EEXIST); } ASSERT(zp->z_phys->zp_links > zp_is_dir); if (--zp->z_phys->zp_links == zp_is_dir) { zp->z_reap = 1; zp->z_phys->zp_links = 0; reaped = 1; } else { zfs_time_stamper_locked(zp, STATE_CHANGED, tx); } mutex_exit(&zp->z_lock); vn_vfsunlock(vp); } dmu_buf_will_dirty(dzp->z_dbuf, tx); mutex_enter(&dzp->z_lock); dzp->z_phys->zp_size--; /* one dirent removed */ dzp->z_phys->zp_links -= zp_is_dir; /* ".." link from zp */ zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx); mutex_exit(&dzp->z_lock); error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, tx); ASSERT(error == 0); if (reaped_ptr != NULL) *reaped_ptr = reaped; else if (reaped) zfs_dq_add(zp, tx); return (0); }
/* * Replaying ACLs is complicated by FUID support. * The log record may contain some optional data * to be used for replaying FUID's. These pieces * are the actual FUIDs that were created initially. * The FUID table index may no longer be valid and * during zfs_create() a new index may be assigned. * Because of this the log will contain the original * doman+rid in order to create a new FUID. * * The individual ACEs may contain an ephemeral uid/gid which is no * longer valid and will need to be replaced with an actual FUID. * */ static int zfs_replay_acl(zfsvfs_t *zfsvfs, void *data, boolean_t byteswap) { #ifdef __OSV__ kprintf("TX_ACL_V0 not supported on OSv\n"); return EOPNOTSUPP; #else lr_acl_t *lr = data; ace_t *ace = (ace_t *)(lr + 1); vsecattr_t vsa; znode_t *zp; int error; if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); if (lr->lr_fuidcnt) { byteswap_uint64_array((caddr_t)ace + ZIL_ACE_LENGTH(lr->lr_acl_bytes), lr->lr_fuidcnt * sizeof (uint64_t)); } } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); #ifdef TODO bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; vsa.vsa_aclcnt = lr->lr_aclcnt; vsa.vsa_aclentp = ace; vsa.vsa_aclentsz = lr->lr_acl_bytes; vsa.vsa_aclflags = lr->lr_acl_flags; if (lr->lr_fuidcnt) { void *fuidstart = (caddr_t)ace + ZIL_ACE_LENGTH(lr->lr_acl_bytes); zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, &fuidstart, lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); } error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); if (zfsvfs->z_fuid_replay) zfs_fuid_info_free(zfsvfs->z_fuid_replay); #else error = EOPNOTSUPP; #endif zfsvfs->z_fuid_replay = NULL; VN_RELE(ZTOV(zp)); return (error); #endif }
static int zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) { char *data = (char *)(lr + 1); /* data follows lr_write_t */ znode_t *zp; int error; ssize_t resid; uint64_t orig_eof, eod, offset, length; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log writes out of order, it's possible the * file has been removed. In this case just drop the write * and return success. */ if (error == ENOENT) error = 0; return (error); } offset = lr->lr_offset; length = lr->lr_length; eod = offset + length; /* end of data for this write */ orig_eof = zp->z_phys->zp_size; /* If it's a dmu_sync() block, write the whole block */ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); if (length < blocksize) { offset -= offset % blocksize; length = blocksize; } } error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); /* * This may be a write from a dmu_sync() for a whole block, * and may extend beyond the current end of the file. * We can't just replay what was written for this TX_WRITE as * a future TX_WRITE2 may extend the eof and the data for that * write needs to be there. So we write the whole block and * reduce the eof. */ if (orig_eof < zp->z_phys->zp_size) /* file length grew ? */ zp->z_phys->zp_size = eod; VN_RELE(ZTOV(zp)); return (error); }
/* * Look up an entry in a directory. * * NOTE: '.' and '..' are handled as special cases because * no directory entries are actually stored for them. If this is * the root of a filesystem, then '.zfs' is also treated as a * special pseudo-directory. */ int zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, int *deflg, pathname_t *rpnp) { zfs_dirlock_t *dl; znode_t *zp; int error = 0; uint64_t parent; if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *vpp = ZTOV(dzp); VN_HOLD(*vpp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) return (error); if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred, NULL, NULL, NULL); return (error); } rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zfsvfs, parent, &zp); if (error == 0) *vpp = ZTOV(zp); rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *vpp = zfsctl_root(dzp); } else { int zf; zf = ZEXISTS | ZSHARED; if (flags & FIGNORECASE) zf |= ZCILOOK; error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); if (error == 0) { *vpp = ZTOV(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } rpnp = NULL; } if ((flags & FIGNORECASE) && rpnp && !error) (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); return (error); }
/* * Replaying ACLs is complicated by FUID support. * The log record may contain some optional data * to be used for replaying FUID's. These pieces * are the actual FUIDs that were created initially. * The FUID table index may no longer be valid and * during zfs_create() a new index may be assigned. * Because of this the log will contain the original * doman+rid in order to create a new FUID. * * The individual ACEs may contain an ephemeral uid/gid which is no * longer valid and will need to be replaced with an actual FUID. * */ static int zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); vsecattr_t vsa; znode_t *zp; int error; if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); if (lr->lr_fuidcnt) { byteswap_uint64_array((caddr_t)ace + ZIL_ACE_LENGTH(lr->lr_acl_bytes), lr->lr_fuidcnt * sizeof (uint64_t)); } } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { /* * As we can log acls out of order, it's possible the * file has been removed. In this case just drop the acl * and return success. */ if (error == ENOENT) error = 0; return (error); } bzero(&vsa, sizeof (vsa)); vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; vsa.vsa_aclcnt = lr->lr_aclcnt; vsa.vsa_aclentp = ace; vsa.vsa_aclentsz = lr->lr_acl_bytes; vsa.vsa_aclflags = lr->lr_acl_flags; if (lr->lr_fuidcnt) { void *fuidstart = (caddr_t)ace + ZIL_ACE_LENGTH(lr->lr_acl_bytes); zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, &fuidstart, lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); } error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); if (zfsvfs->z_fuid_replay) zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; VN_RELE(ZTOV(zp)); return (error); }
static int zfs_replay_setattr(void *arg1, char *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = (zfsvfs_t *)arg1; lr_setattr_t *lr = (lr_setattr_t *)arg2; znode_t *zp; xvattr_t xva; vattr_t *vap = &xva.xva_vattr; int error; void *start; xva_init(&xva); if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); if ((lr->lr_mask & AT_XVATTR) && zfsvfs->z_version >= ZPL_VERSION_INITIAL) zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); vap->va_size = lr->lr_size; ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); /* * Fill in xvattr_t portions if necessary. */ start = (lr_setattr_t *)(lr + 1); #ifdef HAVE_ZPL if (vap->va_mask & AT_XVATTR) { zfs_replay_xvattr((lr_attr_t *)start, &xva); start = (caddr_t)start + ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); } else xva.xva_vattr.va_mask &= ~AT_XVATTR; #endif /* HAVE_ZPL */ zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); zfs_fuid_info_free(zfsvfs->z_fuid_replay); zfsvfs->z_fuid_replay = NULL; VN_RELE(ZTOV(zp)); return (error); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); vput(ZTOV(xzp)); skipped += 1; continue; } error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); vput(ZTOV(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, dzp->z_id); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_bonus(tx, xzp->z_id); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); VN_RELE(ZTOV(xzp)); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); VN_RELE(ZTOV(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp) #endif { zfs_dirlock_t *dl; znode_t *zp; int error = 0; uint64_t parent; #ifdef __APPLE__ char *name = cnp->cn_nameptr; #endif if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { *vpp = ZTOV(dzp); VN_HOLD(*vpp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) return (error); if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred /*, NULL, NULL, NULL*/); return (error); } rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zfsvfs, parent, &zp); if (error == 0) *vpp = ZTOV(zp); rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *vpp = zfsctl_root(dzp); } else { #ifdef __APPLE__ error = zfs_dirent_lock(&dl, dzp, cnp, &zp, ZEXISTS | ZSHARED); #else error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED); #endif if (error == 0) { *vpp = ZTOV(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } } return (error); }