void zfs_zinactive(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; boolean_t drop_mutex = 0; ASSERT(zp->z_sa_hdl); /* * Don't allow a zfs_zget() while were trying to release this znode. * * Linux allows direct memory reclaim which means that any KM_SLEEP * allocation may trigger inode eviction. This can lead to a deadlock * through the ->shrink_icache_memory()->evict()->zfs_inactive()-> * zfs_zinactive() call path. To avoid this deadlock the process * must not reacquire the mutex when it is already holding it. */ if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) { ZFS_OBJ_HOLD_ENTER(zsb, z_id); drop_mutex = 1; } mutex_enter(&zp->z_lock); /* * If this was the last reference to a file with no links, * remove the file from the file system. */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); if (drop_mutex) ZFS_OBJ_HOLD_EXIT(zsb, z_id); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); if (drop_mutex) ZFS_OBJ_HOLD_EXIT(zsb, z_id); }
void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { zfs_sb_t *zsb = ZTOZSB(zp); objset_t *os = zsb->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zfs_external_acl(zp); ZFS_OBJ_HOLD_ENTER(zsb, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj); }
int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; int err; sa_handle_t *hdl; *zpp = NULL; again: ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { /* * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. * The SA handle is still valid but because the VFS * requires that the eviction succeed we must drop * our locks and references to allow the eviction to * complete. The zfs_zget() may then be retried. * * This unlikely case could be optimized by registering * a sops->drop_inode() callback. The callback would * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); goto again; } *zpp = zp; err = 0; } mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); }
/* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfs_sb_t *zsb = ZTOZSB(dzp); dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; if (zsb->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); } obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (S_ISDIR(vap->va_mode)) { if (zsb->z_replay) { VERIFY0(zap_create_claim_norm(zsb->z_os, obj, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx)); } else { obj = zap_create_norm(zsb->z_os, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { if (zsb->z_replay) { VERIFY0(dmu_object_claim(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx)); } else { obj = dmu_object_alloc(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } ZFS_OBJ_HOLD_ENTER(zsb, obj); VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zsb->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) rdev = vap->va_rdev; parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & ATTR_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl, ZTOI(dzp)); VERIFY(*zpp != NULL); VERIFY(dzp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zsb, obj); }
/*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Ensure that the filesystem is not unmounted during the move. */ if (zfs_enter(zfsvfs) != 0) { /* ZFS_ENTER */ ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); }
int zfs_rezget(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_object_info_t doi; dmu_buf_t *db; uint64_t obj_num = zp->z_id; uint64_t mode; sa_bulk_attr_t bulk[8]; int err; int count = 0; uint64_t gen; ZFS_OBJ_HOLD_ENTER(zsb, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } mutex_exit(&zp->z_acl_lock); rw_enter(&zp->z_xattr_lock, RW_WRITER); if (zp->z_xattr_cached) { nvlist_free(zp->z_xattr_cached); zp->z_xattr_cached = NULL; } if (zp->z_xattr_parent) { iput(ZTOI(zp->z_xattr_parent)); zp->z_xattr_parent = NULL; } rw_exit(&zp->z_xattr_lock); ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL); /* reload cached values */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &gen, sizeof (gen)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, sizeof (zp->z_atime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, sizeof (zp->z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, sizeof (zp->z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, sizeof (mode)); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EIO)); } zp->z_mode = mode; if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EIO)); } zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; zfs_inode_update(zp); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (0); }
int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; int err; sa_handle_t *hdl; struct inode *ip; *zpp = NULL; again: ip = ilookup(zsb->z_sb, obj_num); ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { if (ip == NULL) { /* * ilookup returned NULL, which means * the znode is dying - but the SA handle isn't * quite dead yet, we need to drop any locks * we're holding, re-schedule the task and try again. */ sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); schedule(); goto again; } zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { igrab(ZTOI(zp)); *zpp = zp; err = 0; } sa_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); iput(ip); return (err); } ASSERT3P(ip, ==, NULL); /* * Not found create new znode/vnode but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in * progress. This is checked for in zfs_znode_alloc() * * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, doi.doi_bonus_type, obj_num, NULL, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { *zpp = zp; } ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); }
void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; znode_t *xzp = NULL; char obj_name[17]; dmu_tx_t *tx; uint64_t acl_obj; int error; #ifndef __APPLE__ ASSERT(ZTOV(zp)->v_count == 0); #endif /*!__APPLE__*/ ASSERT(zp->z_phys->zp_links == 0); #ifdef ZFS_DEBUG znode_stalker(zp, N_zfs_rmnode); #endif /* * If this is an attribute directory, purge its contents. */ #ifdef __APPLE__ if (S_ISDIR(zp->z_phys->zp_mode) && (zp->z_phys->zp_flags & ZFS_XATTR)) #else if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) #endif { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it on the unlinked set. */ return; } } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ if (zp->z_phys->zp_xattr) { error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); ASSERT(error == 0); } acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; /* * Set up the transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_bonus(tx, xzp->z_id); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); #ifdef __APPLE__ /*XXX NOEL: double check this path logic. see radar 5182217. * This may be disturbing some of the evict logic * and hence causing the NULL ptr drefs seen every great while * in some of the test cases*/ zp->z_dbuf_held = 0; ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); dmu_buf_rele(zp->z_dbuf, NULL); #endif /* __APPLE__ */ return; } if (xzp) { dmu_buf_will_dirty(xzp->z_dbuf, tx); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_phys->zp_links = 0; /* no more links to it */ mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ error = zap_remove(os, zfsvfs->z_unlinkedobj, zfs_unlinked_hexname(obj_name, zp->z_id), tx); ASSERT3U(error, ==, 0); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); if (xzp) VN_RELE(ZTOV(xzp)); }
/*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Close a small window in which it's possible that the filesystem could * be unmounted and freed, and zfsvfs, though valid in the previous * statement, could point to unrelated memory by the time we try to * prevent the filesystem from being unmounted. */ rw_enter(&zfsvfs_lock, RW_WRITER); if (zfsvfs != ozp->z_zfsvfs) { rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); return (KMEM_CBRC_DONT_KNOW); } /* * If the znode is still valid, then so is the file system. We know that * no valid file system can be freed while we hold zfsvfs_lock, so we * can safely ensure that the filesystem is not and will not be * unmounted. The next statement is equivalent to ZFS_ENTER(). */ rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); if (zfsvfs->z_unmounted) { ZFS_EXIT(zfsvfs); rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } rw_exit(&zfsvfs_lock); mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); }