int tmpfs_reclaim(struct vop_reclaim_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); if (vp->v_type == VREG) tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj); else vnode_destroy_vobject(vp); vp->v_object = NULL; cache_purge(vp); TMPFS_NODE_LOCK(node); TMPFS_ASSERT_ELOCKED(node); tmpfs_free_vp(vp); /* If the node referenced by this vnode was deleted by the user, * we must free its associated data structures (now that the vnode * is being reclaimed). */ if (node->tn_links == 0 && (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) { node->tn_vpstate = TMPFS_VNODE_DOOMED; TMPFS_NODE_UNLOCK(node); tmpfs_free_node(tmp, node); } else TMPFS_NODE_UNLOCK(node); MPASS(vp->v_data == NULL); return 0; }
/* * Frees a directory entry. It is the caller's responsibility to destroy * the node referenced by it if needed. * * The link count of node is decreased by one to reflect the removal of an * object that referenced it. This only happens if 'node_exists' is true; * otherwise the function will not access the node referred to by the * directory entry, as it may already have been released from the outside. */ void tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) { struct tmpfs_node *node; node = de->td_node; TMPFS_NODE_LOCK(node); TMPFS_ASSERT_ELOCKED(node); KKASSERT(node->tn_links > 0); node->tn_links--; TMPFS_NODE_UNLOCK(node); kfree(de->td_name, tmp->tm_name_zone); de->td_namelen = 0; de->td_name = NULL; de->td_node = NULL; objcache_put(tmp->tm_dirent_pool, de); }
/* * Change ownership of the given vnode. At least one of uid or gid must * be different than VNOVAL. If one is set to that value, the attribute * is unchanged. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) { mode_t cur_mode; uid_t cur_uid; gid_t cur_gid; struct tmpfs_node *node; int error; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; cur_uid = node->tn_uid; cur_gid = node->tn_gid; cur_mode = node->tn_mode; error = vop_helper_chown(vp, uid, gid, cred, &cur_uid, &cur_gid, &cur_mode); if (error == 0) { TMPFS_NODE_LOCK(node); if (cur_uid != node->tn_uid || cur_gid != node->tn_gid || cur_mode != node->tn_mode) { node->tn_uid = cur_uid; node->tn_gid = cur_gid; node->tn_mode = cur_mode; node->tn_status |= TMPFS_NODE_CHANGED; } TMPFS_NODE_UNLOCK(node); } return error; }
static int tmpfs_readlink(struct vop_readlink_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int error; struct tmpfs_node *node; KKASSERT(uio->uio_offset == 0); KKASSERT(vp->v_type == VLNK); node = VP_TO_TMPFS_NODE(vp); error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid), uio); TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED; TMPFS_NODE_UNLOCK(node); return error; }
int tmpfs_getattr(struct vop_getattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmpfs_update(vp); TMPFS_NODE_LOCK_SH(node); vap->va_type = vp->v_type; vap->va_mode = node->tn_mode; vap->va_nlink = node->tn_links; vap->va_uid = node->tn_uid; vap->va_gid = node->tn_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->tn_id; vap->va_size = node->tn_size; vap->va_blocksize = PAGE_SIZE; vap->va_atime.tv_sec = node->tn_atime; vap->va_atime.tv_nsec = node->tn_atimensec; vap->va_mtime.tv_sec = node->tn_mtime; vap->va_mtime.tv_nsec = node->tn_mtimensec; vap->va_ctime.tv_sec = node->tn_ctime; vap->va_ctime.tv_nsec = node->tn_ctimensec; vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; if (vp->v_type == VBLK || vp->v_type == VCHR) { vap->va_rmajor = umajor(node->tn_rdev); vap->va_rminor = uminor(node->tn_rdev); } vap->va_bytes = round_page(node->tn_size); vap->va_filerev = 0; TMPFS_NODE_UNLOCK(node); return 0; }
int tmpfs_reclaim(struct vop_reclaim_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; struct mount *mp; mp = vp->v_mount; lwkt_gettoken(&mp->mnt_token); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); KKASSERT(mp == tmp->tm_mount); tmpfs_free_vp(vp); /* * If the node referenced by this vnode was deleted by the * user, we must free its associated data structures now that * the vnode is being reclaimed. * * Directories have an extra link ref. */ TMPFS_NODE_LOCK(node); if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 && node->tn_links == 0) { node->tn_vpstate = TMPFS_VNODE_DOOMED; tmpfs_free_node(tmp, node); /* eats the lock */ } else { TMPFS_NODE_UNLOCK(node); } lwkt_reltoken(&mp->mnt_token); KKASSERT(vp->v_data == NULL); return 0; }
/* Sync timestamps */ void tmpfs_itimes(struct vnode *vp, const struct timespec *acc, const struct timespec *mod) { struct tmpfs_node *node; struct timespec now; node = VP_TO_TMPFS_NODE(vp); if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) return; vfs_timestamp(&now); TMPFS_NODE_LOCK(node); if (node->tn_status & TMPFS_NODE_ACCESSED) { if (acc == NULL) acc = &now; node->tn_atime = acc->tv_sec; node->tn_atimensec = acc->tv_nsec; } if (node->tn_status & TMPFS_NODE_MODIFIED) { if (mod == NULL) mod = &now; node->tn_mtime = mod->tv_sec; node->tn_mtimensec = mod->tv_nsec; } if (node->tn_status & TMPFS_NODE_CHANGED) { node->tn_ctime = now.tv_sec; node->tn_ctimensec = now.tv_nsec; } node->tn_status &= ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); TMPFS_NODE_UNLOCK(node); }
/* * Change access mode on the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) { struct tmpfs_node *node; mode_t cur_mode; int error; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; cur_mode = node->tn_mode; error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, &cur_mode); if (error == 0 && (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { TMPFS_NODE_LOCK(node); node->tn_mode &= ~ALLPERMS; node->tn_mode |= cur_mode & ALLPERMS; node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); } KKASSERT(vn_islocked(vp)); return 0; }
static int tmpfs_readdir(struct vop_readdir_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int *eofflag = v->a_eofflag; off_t **cookies = v->a_cookies; int *ncookies = v->a_ncookies; struct tmpfs_mount *tmp; int error; off_t startoff; off_t cnt = 0; struct tmpfs_node *node; /* This operation only makes sense on directory nodes. */ if (vp->v_type != VDIR) { return ENOTDIR; } tmp = VFS_TO_TMPFS(vp->v_mount); node = VP_TO_TMPFS_DIR(vp); startoff = uio->uio_offset; if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { error = tmpfs_dir_getdotdent(node, uio); if (error != 0) { TMPFS_NODE_LOCK_SH(node); goto outok; } cnt++; } if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { /* may lock parent, cannot hold node lock */ error = tmpfs_dir_getdotdotdent(tmp, node, uio); if (error != 0) { TMPFS_NODE_LOCK_SH(node); goto outok; } cnt++; } TMPFS_NODE_LOCK_SH(node); error = tmpfs_dir_getdents(node, uio, &cnt); outok: KKASSERT(error >= -1); if (error == -1) error = 0; if (eofflag != NULL) *eofflag = (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); /* Update NFS-related variables. */ if (error == 0 && cookies != NULL && ncookies != NULL) { off_t i; off_t off = startoff; struct tmpfs_dirent *de = NULL; *ncookies = cnt; *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); for (i = 0; i < cnt; i++) { KKASSERT(off != TMPFS_DIRCOOKIE_EOF); if (off == TMPFS_DIRCOOKIE_DOT) { off = TMPFS_DIRCOOKIE_DOTDOT; } else { if (off == TMPFS_DIRCOOKIE_DOTDOT) { de = RB_MIN(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree); } else if (de != NULL) { de = RB_NEXT(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree, de); } else { de = tmpfs_dir_lookupbycookie(node, off); KKASSERT(de != NULL); de = RB_NEXT(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree, de); } if (de == NULL) off = TMPFS_DIRCOOKIE_EOF; else off = tmpfs_dircookie(de); } (*cookies)[i] = off; } KKASSERT(uio->uio_offset == off); } TMPFS_NODE_UNLOCK(node); if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED; TMPFS_NODE_UNLOCK(node); } return error; }
static int tmpfs_nrmdir(struct vop_nrmdir_args *v) { struct vnode *dvp = v->a_dvp; struct namecache *ncp = v->a_nch->ncp; struct vnode *vp; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; struct mount *mp; int error; mp = dvp->v_mount; /* * We have to acquire the vp from v->a_nch because we will likely * unresolve the namecache entry, and a vrele/vput is needed to * trigger the tmpfs_inactive/tmpfs_reclaim sequence. * * We have to use vget to clear any inactive state on the vnode, * otherwise the vnode may remain inactive and thus tmpfs_inactive * will not get called when we release it. */ error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp); KKASSERT(error == 0); vn_unlock(vp); /* * Prevalidate so we don't hit an assertion later */ if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* * Directories with more than two entries ('.' and '..') cannot * be removed. */ if (node->tn_size > 0) { error = ENOTEMPTY; goto out; } if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * This invariant holds only if we are not trying to * remove "..". We checked for that above so this is safe now. */ KKASSERT(node->tn_dir.tn_parent == dnode); /* * Get the directory entry associated with node (vp). This * was filled by tmpfs_lookup while looking up the entry. */ TMPFS_NODE_LOCK(dnode); de = tmpfs_dir_lookup(dnode, node, ncp); KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen)); /* Check flags to see if we are allowed to remove the directory. */ if ((dnode->tn_flags & APPEND) || node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { error = EPERM; TMPFS_NODE_UNLOCK(dnode); goto out; } /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dnode, de); TMPFS_NODE_UNLOCK(dnode); /* No vnode should be allocated for this entry from this point */ TMPFS_NODE_LOCK(dnode); TMPFS_ASSERT_ELOCKED(dnode); TMPFS_NODE_LOCK(node); TMPFS_ASSERT_ELOCKED(node); /* * Must set parent linkage to NULL (tested by ncreate to disallow * the creation of new files/dirs in a deleted directory) */ node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(node); TMPFS_NODE_UNLOCK(dnode); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ TMPFS_NODE_LOCK(dnode); dnode->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(dnode); tmpfs_update(dvp); cache_unlink(v->a_nch); tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); error = 0; out: vrele(vp); return error; }
static int tmpfs_nrename(struct vop_nrename_args *v) { struct vnode *fdvp = v->a_fdvp; struct namecache *fncp = v->a_fnch->ncp; struct vnode *fvp = fncp->nc_vp; struct vnode *tdvp = v->a_tdvp; struct namecache *tncp = v->a_tnch->ncp; struct vnode *tvp; struct tmpfs_dirent *de, *tde; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; struct mount *mp; char *newname; char *oldname; int error; mp = fdvp->v_mount; KKASSERT(fdvp->v_mount == fvp->v_mount); /* * Because tvp can get overwritten we have to vget it instead of * just vref or use it, otherwise it's VINACTIVE flag may not get * cleared and the node won't get destroyed. */ error = cache_vget(v->a_tnch, v->a_cred, LK_SHARED, &tvp); if (error == 0) { tnode = VP_TO_TMPFS_NODE(tvp); vn_unlock(tvp); } else { tnode = NULL; } /* Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); TMPFS_NODE_LOCK(fdnode); de = tmpfs_dir_lookup(fdnode, fnode, fncp); TMPFS_NODE_UNLOCK(fdnode); /* XXX depend on namecache lock */ /* Avoid manipulating '.' and '..' entries. */ if (de == NULL) { error = ENOENT; goto out_locked; } KKASSERT(de->td_node == fnode); /* * If replacing an entry in the target directory and that entry * is a directory, it must be empty. * * Kern_rename gurantees the destination to be a directory * if the source is one (it does?). */ if (tvp != NULL) { KKASSERT(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size > 0) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { KKASSERT(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* * Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fncp->nc_nlen != tncp->nc_nlen || bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) { newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); if (newname == NULL) { error = ENOSPC; goto out_locked; } bcopy(tncp->nc_name, newname, tncp->nc_nlen); newname[tncp->nc_nlen] = '\0'; } else { newname = NULL; } /* * Unlink entry from source directory. Note that the kernel has * already checked for illegal recursion cases (renaming a directory * into a subdirectory of itself). */ if (fdnode != tdnode) { tmpfs_dir_detach(fdnode, de); } else { /* XXX depend on namecache lock */ TMPFS_NODE_LOCK(fdnode); KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp)); RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de); RB_REMOVE(tmpfs_dirtree_cookie, &fdnode->tn_dir.tn_cookietree, de); TMPFS_NODE_UNLOCK(fdnode); } /* * Handle any name change. Swap with newname, we will * deallocate it at the end. */ if (newname != NULL) { #if 0 TMPFS_NODE_LOCK(fnode); fnode->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(fnode); #endif oldname = de->td_name; de->td_name = newname; de->td_namelen = (uint16_t)tncp->nc_nlen; newname = oldname; } /* * If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { /* Remove the old entry from the target directory. */ TMPFS_NODE_LOCK(tdnode); tde = tmpfs_dir_lookup(tdnode, tnode, tncp); tmpfs_dir_detach(tdnode, tde); TMPFS_NODE_UNLOCK(tdnode); tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE); /* * Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); /*cache_inval_vp(tvp, CINV_DESTROY);*/ } /* * Link entry to target directory. If the entry * represents a directory move the parent linkage * as well. */ if (fdnode != tdnode) { if (de->td_node->tn_type == VDIR) { TMPFS_VALIDATE_DIR(fnode); } tmpfs_dir_attach(tdnode, de); } else { TMPFS_NODE_LOCK(tdnode); tdnode->tn_status |= TMPFS_NODE_MODIFIED; RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de); RB_INSERT(tmpfs_dirtree_cookie, &tdnode->tn_dir.tn_cookietree, de); TMPFS_NODE_UNLOCK(tdnode); } /* * Finish up */ if (newname) { kfree(newname, tmp->tm_name_zone); newname = NULL; } cache_rename(v->a_fnch, v->a_tnch); tmpfs_knote(v->a_fdvp, NOTE_WRITE); tmpfs_knote(v->a_tdvp, NOTE_WRITE); if (fnode->tn_vnode) tmpfs_knote(fnode->tn_vnode, NOTE_RENAME); error = 0; out_locked: ; out: if (tvp) vrele(tvp); return error; }
static int tmpfs_nremove(struct vop_nremove_args *v) { struct vnode *dvp = v->a_dvp; struct namecache *ncp = v->a_nch->ncp; struct vnode *vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; struct mount *mp; mp = dvp->v_mount; /* * We have to acquire the vp from v->a_nch because we will likely * unresolve the namecache entry, and a vrele/vput is needed to * trigger the tmpfs_inactive/tmpfs_reclaim sequence. * * We have to use vget to clear any inactive state on the vnode, * otherwise the vnode may remain inactive and thus tmpfs_inactive * will not get called when we release it. */ error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp); KKASSERT(vp->v_mount == dvp->v_mount); KKASSERT(error == 0); vn_unlock(vp); if (vp->v_type == VDIR) { error = EISDIR; goto out2; } dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); TMPFS_NODE_LOCK(dnode); de = tmpfs_dir_lookup(dnode, node, ncp); if (de == NULL) { error = ENOENT; goto out; } /* Files marked as immutable or append-only cannot be deleted. */ if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->tn_flags & APPEND)) { error = EPERM; goto out; } /* Remove the entry from the directory; as it is a file, we do not * have to change the number of hard links of the directory. */ tmpfs_dir_detach(dnode, de); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); if (node->tn_links > 0) { TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(node); } cache_unlink(v->a_nch); tmpfs_knote(vp, NOTE_DELETE); error = 0; out: TMPFS_NODE_UNLOCK(dnode); if (error == 0) tmpfs_knote(dvp, NOTE_WRITE); out2: vrele(vp); return error; }
static int tmpfs_rmdir(struct vop_rmdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(VOP_ISLOCKED(vp)); tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* Directories with more than two entries ('.' and '..') cannot be * removed. */ if (node->tn_size > 0) { error = ENOTEMPTY; goto out; } if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* This invariant holds only if we are not trying to remove "..". * We checked for that above so this is safe now. */ MPASS(node->tn_dir.tn_parent == dnode); /* Get the directory entry associated with node (vp). This was * filled by tmpfs_lookup while looking up the entry. */ de = tmpfs_dir_lookup(dnode, node, v->a_cnp); MPASS(TMPFS_DIRENT_MATCHES(de, v->a_cnp->cn_nameptr, v->a_cnp->cn_namelen)); /* Check flags to see if we are allowed to remove the directory. */ if (dnode->tn_flags & APPEND || node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dvp, de); if (v->a_cnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(dvp, v->a_cnp); /* No vnode should be allocated for this entry from this point */ TMPFS_NODE_LOCK(node); TMPFS_ASSERT_ELOCKED(node); node->tn_links--; node->tn_dir.tn_parent = NULL; node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(node); TMPFS_NODE_LOCK(dnode); TMPFS_ASSERT_ELOCKED(dnode); dnode->tn_links--; dnode->tn_status |= TMPFS_NODE_ACCESSED | \ TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(dnode); cache_purge(dvp); cache_purge(vp); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de, TRUE); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ dnode->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(dvp); error = 0; out: return error; }
static int tmpfs_rename(struct vop_rename_args *v) { struct vnode *fdvp = v->a_fdvp; struct vnode *fvp = v->a_fvp; struct componentname *fcnp = v->a_fcnp; struct vnode *tdvp = v->a_tdvp; struct vnode *tvp = v->a_tvp; struct componentname *tcnp = v->a_tcnp; char *newname; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; MPASS(VOP_ISLOCKED(tdvp)); MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); MPASS(fcnp->cn_flags & HASBUF); MPASS(tcnp->cn_flags & HASBUF); tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); /* Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } /* If we need to move the directory between entries, lock the * source so that we can safely operate on it. */ if (fdvp != tdvp && fdvp != tvp) vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); de = tmpfs_dir_lookup(fdnode, fnode, fcnp); /* Entry can disappear before we lock fdvp, * also avoid manipulating '.' and '..' entries. */ if (de == NULL) { if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) error = EINVAL; else error = ENOENT; goto out_locked; } MPASS(de->td_node == fnode); /* If re-naming a directory to another preexisting directory * ensure that the target directory is empty so that its * removal causes no side effects. * Kern_rename gurantees the destination to be a directory * if the source is one. */ if (tvp != NULL) { MPASS(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size > 0) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { MPASS(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fcnp->cn_namelen != tcnp->cn_namelen || bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK); } else newname = NULL; /* If the node is being moved to another directory, we have to do * the move. */ if (fdnode != tdnode) { /* In case we are moving a directory, we have to adjust its * parent to point to the new parent. */ if (de->td_node->tn_type == VDIR) { struct tmpfs_node *n; /* Ensure the target directory is not a child of the * directory being moved. Otherwise, we'd end up * with stale nodes. */ n = tdnode; /* TMPFS_LOCK garanties that no nodes are freed while * traversing the list. Nodes can only be marked as * removed: tn_parent == NULL. */ TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(n); while (n != n->tn_dir.tn_parent) { struct tmpfs_node *parent; if (n == fnode) { TMPFS_NODE_UNLOCK(n); TMPFS_UNLOCK(tmp); error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } parent = n->tn_dir.tn_parent; TMPFS_NODE_UNLOCK(n); if (parent == NULL) { n = NULL; break; } TMPFS_NODE_LOCK(parent); if (parent->tn_dir.tn_parent == NULL) { TMPFS_NODE_UNLOCK(parent); n = NULL; break; } n = parent; } TMPFS_UNLOCK(tmp); if (n == NULL) { error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } TMPFS_NODE_UNLOCK(n); /* Adjust the parent pointer. */ TMPFS_VALIDATE_DIR(fnode); TMPFS_NODE_LOCK(de->td_node); de->td_node->tn_dir.tn_parent = tdnode; TMPFS_NODE_UNLOCK(de->td_node); /* As a result of changing the target of the '..' * entry, the link count of the source and target * directories has to be adjusted. */ TMPFS_NODE_LOCK(tdnode); TMPFS_ASSERT_LOCKED(tdnode); tdnode->tn_links++; TMPFS_NODE_UNLOCK(tdnode); TMPFS_NODE_LOCK(fdnode); TMPFS_ASSERT_LOCKED(fdnode); fdnode->tn_links--; TMPFS_NODE_UNLOCK(fdnode); } /* Do the move: just remove the entry from the source directory * and insert it into the target one. */ tmpfs_dir_detach(fdvp, de); if (fcnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(fdvp, fcnp); if (tcnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(tdvp, tcnp); tmpfs_dir_attach(tdvp, de); } /* If the name has changed, we need to make it effective by changing * it in the directory entry. */ if (newname != NULL) { MPASS(tcnp->cn_namelen <= MAXNAMLEN); free(de->td_name, M_TMPFSNAME); de->td_namelen = (uint16_t)tcnp->cn_namelen; memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen); de->td_name = newname; fnode->tn_status |= TMPFS_NODE_CHANGED; tdnode->tn_status |= TMPFS_NODE_MODIFIED; } /* If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { /* Remove the old entry from the target directory. */ de = tmpfs_dir_lookup(tdnode, tnode, tcnp); tmpfs_dir_detach(tdvp, de); /* Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE); } cache_purge(fvp); error = 0; out_locked: if (fdvp != tdvp && fdvp != tvp) VOP_UNLOCK(fdvp, 0); out: /* Release target nodes. */ /* XXX: I don't understand when tdvp can be the same as tvp, but * other code takes care of this... */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp != NULL) vput(tvp); /* Release source nodes. */ vrele(fdvp); vrele(fvp); return error; }
/* * Allocates a new node of type 'type' inside the 'tmp' mount point, with * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', * using the credentials of the process 'p'. * * If the node type is set to 'VDIR', then the parent parameter must point * to the parent directory of the node being created. It may only be NULL * while allocating the root node. * * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter * specifies the device the node represents. * * If the node type is set to 'VLNK', then the parameter target specifies * the file name of the target file for the symbolic link that is being * created. * * Note that new nodes are retrieved from the available list if it has * items or, if it is empty, from the node pool as long as there is enough * space to create them. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, uid_t uid, gid_t gid, mode_t mode, char *target, int rmajor, int rminor, struct tmpfs_node **node) { struct tmpfs_node *nnode; struct timespec ts; udev_t rdev; KKASSERT(IFF(type == VLNK, target != NULL)); KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) return (ENOSPC); nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); if (nnode == NULL) return (ENOSPC); /* Generic initialization. */ nnode->tn_type = type; vfs_timestamp(&ts); nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime = ts.tv_sec; nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec = ts.tv_nsec; nnode->tn_uid = uid; nnode->tn_gid = gid; nnode->tn_mode = mode; nnode->tn_id = tmpfs_fetch_ino(tmp); nnode->tn_advlock.init_done = 0; KKASSERT(nnode->tn_links == 0); /* Type-specific initialization. */ switch (nnode->tn_type) { case VBLK: case VCHR: rdev = makeudev(rmajor, rminor); if (rdev == NOUDEV) { objcache_put(tmp->tm_node_pool, nnode); return(EINVAL); } nnode->tn_rdev = rdev; break; case VDIR: RB_INIT(&nnode->tn_dir.tn_dirtree); RB_INIT(&nnode->tn_dir.tn_cookietree); nnode->tn_size = 0; break; case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: nnode->tn_size = strlen(target); nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); if (nnode->tn_link == NULL) { objcache_put(tmp->tm_node_pool, nnode); return (ENOSPC); } bcopy(target, nnode->tn_link, nnode->tn_size); nnode->tn_link[nnode->tn_size] = '\0'; break; case VREG: nnode->tn_reg.tn_aobj = swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); nnode->tn_reg.tn_aobj_pages = 0; nnode->tn_size = 0; vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_NOPAGEIN); break; default: panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); } TMPFS_NODE_LOCK(nnode); TMPFS_LOCK(tmp); LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); tmp->tm_nodes_inuse++; TMPFS_UNLOCK(tmp); TMPFS_NODE_UNLOCK(nnode); *node = nnode; return 0; }
/* * Allocates a new file of type 'type' and adds it to the parent directory * 'dvp'; this addition is done using the component name given in 'cnp'. * The ownership of the new file is automatically assigned based on the * credentials of the caller (through 'cnp'), the group is set based on * the parent directory and the mode is determined from the 'vap' argument. * If successful, *vpp holds a vnode to the newly created file and zero * is returned. Otherwise *vpp is NULL and the function returns an * appropriate error code. */ int tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct namecache *ncp, struct ucred *cred, char *target) { int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULL; /* * If the directory was removed but a process was CD'd into it, * we do not allow any more file/dir creation within it. Otherwise * we will lose track of it. */ KKASSERT(dnode->tn_type == VDIR); if (dnode != tmp->tm_root && dnode->tn_dir.tn_parent == NULL) return ENOENT; /* * Make sure the link count does not overflow. */ if (vap->va_type == VDIR && dnode->tn_links >= LINK_MAX) return EMLINK; /* Allocate a node that represents the new file. */ error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, dnode->tn_gid, vap->va_mode, target, vap->va_rmajor, vap->va_rminor, &node); if (error != 0) return error; TMPFS_NODE_LOCK(node); /* Allocate a directory entry that points to the new file. */ error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); if (error != 0) { tmpfs_free_node(tmp, node); /* eats node lock */ return error; } /* Allocate a vnode for the new file. */ error = tmpfs_alloc_vp(dvp->v_mount, NULL, node, LK_EXCLUSIVE, vpp); if (error != 0) { tmpfs_free_dirent(tmp, de); tmpfs_free_node(tmp, node); /* eats node lock */ return error; } /* * Now that all required items are allocated, we can proceed to * insert the new node into the directory, an operation that * cannot fail. */ tmpfs_dir_attach(dnode, de); TMPFS_NODE_UNLOCK(node); return error; }
/* * Allocates a new vnode for the node node or returns a new reference to * an existing one if the node had already a vnode referencing it. The * resulting locked vnode is returned in *vpp. * * Returns zero on success or an appropriate error code on failure. * * The caller must ensure that node cannot go away (usually by holding * the related directory entry). * * If dnode is non-NULL this routine avoids deadlocking against it but * can return EAGAIN. Caller must try again. The dnode lock will cycle * in this case, it remains locked on return in all cases. dnode must * be shared-locked. */ int tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *dnode, struct tmpfs_node *node, int lkflag, struct vnode **vpp) { int error = 0; struct vnode *vp; loop: /* * Interlocked extraction from node. This can race many things. * We have to get a soft reference on the vnode while we hold * the node locked, then acquire it properly and check for races. */ TMPFS_NODE_LOCK(node); if ((vp = node->tn_vnode) != NULL) { KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); vhold(vp); TMPFS_NODE_UNLOCK(node); if (dnode) { /* * Special-case handling to avoid deadlocking against * dnode. This case has been validated and occurs * every so often during synth builds. */ if (vget(vp, (lkflag & ~LK_RETRY) | LK_NOWAIT | LK_EXCLUSIVE) != 0) { TMPFS_NODE_UNLOCK(dnode); if (vget(vp, (lkflag & ~LK_RETRY) | LK_SLEEPFAIL | LK_EXCLUSIVE) == 0) { vn_unlock(vp); } vdrop(vp); TMPFS_NODE_LOCK_SH(dnode); return EAGAIN; } } else { /* * Normal path */ if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { vdrop(vp); goto loop; } } if (node->tn_vnode != vp) { vput(vp); vdrop(vp); goto loop; } vdrop(vp); goto out; } /* vp is NULL */ /* * This should never happen. */ if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { TMPFS_NODE_UNLOCK(node); error = ENOENT; goto out; } /* * Interlock against other calls to tmpfs_alloc_vp() trying to * allocate and assign a vp to node. */ if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { node->tn_vpstate |= TMPFS_VNODE_WANT; error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, "tmpfs_alloc_vp", 0); TMPFS_NODE_UNLOCK(node); if (error) return error; goto loop; } node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; TMPFS_NODE_UNLOCK(node); /* * Allocate a new vnode (may block). The ALLOCATING flag should * prevent a race against someone else assigning node->tn_vnode. */ error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); if (error != 0) goto unlock; KKASSERT(node->tn_vnode == NULL); KKASSERT(vp != NULL); vp->v_data = node; vp->v_type = node->tn_type; /* Type-specific initialization. */ switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VSOCK: break; case VREG: /* * VMIO is mandatory. Tmpfs also supports KVABIO * for its tmpfs_strategy(). */ vsetflags(vp, VKVABIO); vinitvmio(vp, node->tn_size, TMPFS_BLKSIZE, -1); break; case VLNK: break; case VFIFO: vp->v_ops = &mp->mnt_vn_fifo_ops; break; case VDIR: break; default: panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); } unlock: TMPFS_NODE_LOCK(node); KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; node->tn_vnode = vp; if (node->tn_vpstate & TMPFS_VNODE_WANT) { node->tn_vpstate &= ~TMPFS_VNODE_WANT; TMPFS_NODE_UNLOCK(node); wakeup(&node->tn_vpstate); } else { TMPFS_NODE_UNLOCK(node); } out: *vpp = vp; KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); return error; }
static int tmpfs_read (struct vop_read_args *ap) { struct buf *bp; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct tmpfs_node *node; off_t base_offset; size_t offset; size_t len; size_t resid; int error; /* * Check the basics */ if (uio->uio_offset < 0) return (EINVAL); if (vp->v_type != VREG) return (EINVAL); /* * Extract node, try to shortcut the operation through * the VM page cache, allowing us to avoid buffer cache * overheads. */ node = VP_TO_TMPFS_NODE(vp); resid = uio->uio_resid; error = vop_helper_read_shortcut(ap); if (error) return error; if (uio->uio_resid == 0) { if (resid) goto finished; return error; } /* * Fall-through to our normal read code. */ while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) { /* * Use buffer cache I/O (via tmpfs_strategy) */ offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; base_offset = (off_t)uio->uio_offset - offset; bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, 0); if (bp == NULL) { error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp); if (error) { brelse(bp); kprintf("tmpfs_read bread error %d\n", error); break; } /* * tmpfs pretty much fiddles directly with the VM * system, don't let it exhaust it or we won't play * nice with other processes. * * Only do this if the VOP is coming from a normal * read/write. The VM system handles the case for * UIO_NOCOPY. */ if (uio->uio_segflg != UIO_NOCOPY) vm_wait_nominal(); } bp->b_flags |= B_CLUSTEROK; /* * Figure out how many bytes we can actually copy this loop. */ len = TMPFS_BLKSIZE - offset; if (len > uio->uio_resid) len = uio->uio_resid; if (len > node->tn_size - uio->uio_offset) len = (size_t)(node->tn_size - uio->uio_offset); error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); bqrelse(bp); if (error) { kprintf("tmpfs_read uiomove error %d\n", error); break; } } finished: if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED; TMPFS_NODE_UNLOCK(node); } return (error); }
static int tmpfs_read (struct vop_read_args *ap) { struct buf *bp; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct tmpfs_node *node; off_t base_offset; size_t offset; size_t len; int error; error = 0; if (uio->uio_resid == 0) { return error; } node = VP_TO_TMPFS_NODE(vp); if (uio->uio_offset < 0) return (EINVAL); if (vp->v_type != VREG) return (EINVAL); while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) { /* * Use buffer cache I/O (via tmpfs_strategy) */ offset = (size_t)uio->uio_offset & BMASK; base_offset = (off_t)uio->uio_offset - offset; bp = getcacheblk(vp, base_offset, BSIZE, 0); if (bp == NULL) { lwkt_gettoken(&vp->v_mount->mnt_token); error = bread(vp, base_offset, BSIZE, &bp); if (error) { brelse(bp); lwkt_reltoken(&vp->v_mount->mnt_token); kprintf("tmpfs_read bread error %d\n", error); break; } lwkt_reltoken(&vp->v_mount->mnt_token); } /* * Figure out how many bytes we can actually copy this loop. */ len = BSIZE - offset; if (len > uio->uio_resid) len = uio->uio_resid; if (len > node->tn_size - uio->uio_offset) len = (size_t)(node->tn_size - uio->uio_offset); error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); bqrelse(bp); if (error) { kprintf("tmpfs_read uiomove error %d\n", error); break; } } TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED; TMPFS_NODE_UNLOCK(node); return(error); }
static int tmpfs_write (struct vop_write_args *ap) { struct buf *bp; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct thread *td = uio->uio_td; struct tmpfs_node *node; boolean_t extended; off_t oldsize; int error; off_t base_offset; size_t offset; size_t len; struct rlimit limit; int trivial = 0; int kflags = 0; int seqcount; error = 0; if (uio->uio_resid == 0) { return error; } node = VP_TO_TMPFS_NODE(vp); if (vp->v_type != VREG) return (EINVAL); seqcount = ap->a_ioflag >> 16; TMPFS_NODE_LOCK(node); oldsize = node->tn_size; if (ap->a_ioflag & IO_APPEND) uio->uio_offset = node->tn_size; /* * Check for illegal write offsets. */ if (uio->uio_offset + uio->uio_resid > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) { error = EFBIG; goto done; } /* * NOTE: Ignore if UIO does not come from a user thread (e.g. VN). */ if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) { error = kern_getrlimit(RLIMIT_FSIZE, &limit); if (error) goto done; if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) { ksignal(td->td_proc, SIGXFSZ); error = EFBIG; goto done; } } /* * Extend the file's size if necessary */ extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size); while (uio->uio_resid > 0) { /* * Don't completely blow out running buffer I/O * when being hit from the pageout daemon. */ if (uio->uio_segflg == UIO_NOCOPY && (ap->a_ioflag & IO_RECURSE) == 0) { bwillwrite(TMPFS_BLKSIZE); } /* * Use buffer cache I/O (via tmpfs_strategy) */ offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; base_offset = (off_t)uio->uio_offset - offset; len = TMPFS_BLKSIZE - offset; if (len > uio->uio_resid) len = uio->uio_resid; if ((uio->uio_offset + len) > node->tn_size) { trivial = (uio->uio_offset <= node->tn_size); error = tmpfs_reg_resize(vp, uio->uio_offset + len, trivial); if (error) break; } /* * Read to fill in any gaps. Theoretically we could * optimize this if the write covers the entire buffer * and is not a UIO_NOCOPY write, however this can lead * to a security violation exposing random kernel memory * (whatever junk was in the backing VM pages before). * * So just use bread() to do the right thing. */ error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp); error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); if (error) { kprintf("tmpfs_write uiomove error %d\n", error); brelse(bp); break; } if (uio->uio_offset > node->tn_size) { node->tn_size = uio->uio_offset; kflags |= NOTE_EXTEND; } kflags |= NOTE_WRITE; /* * Always try to flush the page in the UIO_NOCOPY case. This * can come from the pageout daemon or during vnode eviction. * It is not necessarily going to be marked IO_ASYNC/IO_SYNC. * * For the normal case we buwrite(), dirtying the underlying * VM pages instead of dirtying the buffer and releasing the * buffer as a clean buffer. This allows tmpfs to use * essentially all available memory to cache file data. * If we used bdwrite() the buffer cache would wind up * flushing the data to swap too quickly. * * But because tmpfs can seriously load the VM system we * fall-back to using bdwrite() when free memory starts * to get low. This shifts the load away from the VM system * and makes tmpfs act more like a normal filesystem with * regards to disk activity. * * tmpfs pretty much fiddles directly with the VM * system, don't let it exhaust it or we won't play * nice with other processes. Only do this if the * VOP is coming from a normal read/write. The VM system * handles the case for UIO_NOCOPY. */ bp->b_flags |= B_CLUSTEROK; if (uio->uio_segflg == UIO_NOCOPY) { /* * Flush from the pageout daemon, deal with * potentially very heavy tmpfs write activity * causing long stalls in the pageout daemon * before pages get to free/cache. * * (a) Under severe pressure setting B_DIRECT will * cause a buffer release to try to free the * underlying pages. * * (b) Under modest memory pressure the B_RELBUF * alone is sufficient to get the pages moved * to the cache. We could also force this by * setting B_NOTMETA but that might have other * unintended side-effects (e.g. setting * PG_NOTMETA on the VM page). * * Hopefully this will unblock the VM system more * quickly under extreme tmpfs write load. */ if (vm_page_count_min(vm_page_free_hysteresis)) bp->b_flags |= B_DIRECT; bp->b_flags |= B_AGE | B_RELBUF; bp->b_act_count = 0; /* buffer->deactivate pgs */ cluster_awrite(bp); } else if (vm_page_count_target()) { /* * Normal (userland) write but we are low on memory, * run the buffer the buffer cache. */ bp->b_act_count = 0; /* buffer->deactivate pgs */ bdwrite(bp); } else { /* * Otherwise run the buffer directly through to the * backing VM store. */ buwrite(bp); /*vm_wait_nominal();*/ } if (bp->b_error) { kprintf("tmpfs_write bwrite error %d\n", bp->b_error); break; } } if (error) { if (extended) { (void)tmpfs_reg_resize(vp, oldsize, trivial); kflags &= ~NOTE_EXTEND; } goto done; } /* * Currently we don't set the mtime on files modified via mmap() * because we can't tell the difference between those modifications * and an attempt by the pageout daemon to flush tmpfs pages to * swap. * * This is because in order to defer flushes as long as possible * buwrite() works by marking the underlying VM pages dirty in * order to be able to dispose of the buffer cache buffer without * flushing it. */ if (uio->uio_segflg != UIO_NOCOPY) node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED; if (extended) node->tn_status |= TMPFS_NODE_CHANGED; if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); } done: TMPFS_NODE_UNLOCK(node); if (kflags) tmpfs_knote(vp, kflags); return(error); }
static int tmpfs_write (struct vop_write_args *ap) { struct buf *bp; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct thread *td = uio->uio_td; struct tmpfs_node *node; boolean_t extended; off_t oldsize; int error; off_t base_offset; size_t offset; size_t len; struct rlimit limit; int trivial = 0; int kflags = 0; error = 0; if (uio->uio_resid == 0) { return error; } node = VP_TO_TMPFS_NODE(vp); if (vp->v_type != VREG) return (EINVAL); lwkt_gettoken(&vp->v_mount->mnt_token); oldsize = node->tn_size; if (ap->a_ioflag & IO_APPEND) uio->uio_offset = node->tn_size; /* * Check for illegal write offsets. */ if (uio->uio_offset + uio->uio_resid > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) { lwkt_reltoken(&vp->v_mount->mnt_token); return (EFBIG); } if (vp->v_type == VREG && td != NULL) { error = kern_getrlimit(RLIMIT_FSIZE, &limit); if (error != 0) { lwkt_reltoken(&vp->v_mount->mnt_token); return error; } if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) { ksignal(td->td_proc, SIGXFSZ); lwkt_reltoken(&vp->v_mount->mnt_token); return (EFBIG); } } /* * Extend the file's size if necessary */ extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size); while (uio->uio_resid > 0) { /* * Use buffer cache I/O (via tmpfs_strategy) */ offset = (size_t)uio->uio_offset & BMASK; base_offset = (off_t)uio->uio_offset - offset; len = BSIZE - offset; if (len > uio->uio_resid) len = uio->uio_resid; if ((uio->uio_offset + len) > node->tn_size) { trivial = (uio->uio_offset <= node->tn_size); error = tmpfs_reg_resize(vp, uio->uio_offset + len, trivial); if (error) break; } /* * Read to fill in any gaps. Theoretically we could * optimize this if the write covers the entire buffer * and is not a UIO_NOCOPY write, however this can lead * to a security violation exposing random kernel memory * (whatever junk was in the backing VM pages before). * * So just use bread() to do the right thing. */ error = bread(vp, base_offset, BSIZE, &bp); error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); if (error) { kprintf("tmpfs_write uiomove error %d\n", error); brelse(bp); break; } if (uio->uio_offset > node->tn_size) { node->tn_size = uio->uio_offset; kflags |= NOTE_EXTEND; } kflags |= NOTE_WRITE; /* * Always try to flush the page if the request is coming * from the pageout daemon (IO_ASYNC), else buwrite() the * buffer. * * buwrite() dirties the underlying VM pages instead of * dirtying the buffer, releasing the buffer as a clean * buffer. This allows tmpfs to use essentially all * available memory to cache file data. If we used bdwrite() * the buffer cache would wind up flushing the data to * swap too quickly. */ bp->b_flags |= B_AGE; if (ap->a_ioflag & IO_ASYNC) { bawrite(bp); } else { buwrite(bp); } if (bp->b_error) { kprintf("tmpfs_write bwrite error %d\n", bp->b_error); break; } } if (error) { if (extended) { (void)tmpfs_reg_resize(vp, oldsize, trivial); kflags &= ~NOTE_EXTEND; } goto done; } /* * Currently we don't set the mtime on files modified via mmap() * because we can't tell the difference between those modifications * and an attempt by the pageout daemon to flush tmpfs pages to * swap. * * This is because in order to defer flushes as long as possible * buwrite() works by marking the underlying VM pages dirty in * order to be able to dispose of the buffer cache buffer without * flushing it. */ TMPFS_NODE_LOCK(node); if (uio->uio_segflg != UIO_NOCOPY) node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED; if (extended) node->tn_status |= TMPFS_NODE_CHANGED; if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); } TMPFS_NODE_UNLOCK(node); done: tmpfs_knote(vp, kflags); lwkt_reltoken(&vp->v_mount->mnt_token); return(error); }
static int tmpfs_nlink(struct vop_nlink_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; struct namecache *ncp = v->a_nch->ncp; struct tmpfs_dirent *de; struct tmpfs_node *node; struct tmpfs_node *dnode; struct mount *mp; int error; mp = dvp->v_mount; KKASSERT(dvp != vp); /* XXX When can this be false? */ node = VP_TO_TMPFS_NODE(vp); dnode = VP_TO_TMPFS_NODE(dvp); TMPFS_NODE_LOCK(dnode); /* XXX: Why aren't the following two tests done by the caller? */ /* Hard links of directories are forbidden. */ if (vp->v_type == VDIR) { error = EPERM; goto out; } /* Cannot create cross-device links. */ if (dvp->v_mount != vp->v_mount) { error = EXDEV; goto out; } /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ KKASSERT(node->tn_links <= LINK_MAX); if (node->tn_links >= LINK_MAX) { error = EMLINK; goto out; } /* We cannot create links of files marked immutable or append-only. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Allocate a new directory entry to represent the node. */ error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, ncp->nc_name, ncp->nc_nlen, &de); if (error != 0) goto out; /* Insert the new directory entry into the appropriate directory. */ tmpfs_dir_attach(dnode, de); /* vp link count has changed, so update node times. */ TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); tmpfs_update(vp); tmpfs_knote(vp, NOTE_LINK); cache_setunresolved(v->a_nch); cache_setvp(v->a_nch, vp); error = 0; out: TMPFS_NODE_UNLOCK(dnode); if (error == 0) tmpfs_knote(dvp, NOTE_WRITE); return error; }
/* * Destroys the node pointed to by node from the file system 'tmp'. * If the node does not belong to the given mount point, the results are * unpredicted. * * If the node references a directory; no entries are allowed because * their removal could need a recursive algorithm, something forbidden in * kernel space. Furthermore, there is not need to provide such * functionality (recursive removal) because the only primitives offered * to the user are the removal of empty directories and the deletion of * individual files. * * Note that nodes are not really deleted; in fact, when a node has been * allocated, it cannot be deleted during the whole life of the file * system. Instead, they are moved to the available list and remain there * until reused. * * A caller must have TMPFS_NODE_LOCK(node) and this function unlocks it. */ void tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) { vm_pindex_t pages = 0; #ifdef INVARIANTS TMPFS_ASSERT_ELOCKED(node); KKASSERT(node->tn_vnode == NULL); KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); #endif TMPFS_LOCK(tmp); LIST_REMOVE(node, tn_entries); tmp->tm_nodes_inuse--; TMPFS_UNLOCK(tmp); TMPFS_NODE_UNLOCK(node); /* Caller has this lock */ switch (node->tn_type) { case VNON: /* Do not do anything. VNON is provided to let the * allocation routine clean itself easily by avoiding * duplicating code in it. */ /* FALLTHROUGH */ case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ break; case VDIR: /* * The parent link can be NULL if this is the root * node or if it is a directory node that was rmdir'd. * * XXX what if node is a directory which still contains * directory entries (e.g. due to a forced umount) ? */ node->tn_size = 0; KKASSERT(node->tn_dir.tn_parent == NULL); /* * If the root node is being destroyed don't leave a * dangling pointer in tmpfs_mount. */ if (node == tmp->tm_root) tmp->tm_root = NULL; break; case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: kfree(node->tn_link, tmp->tm_name_zone); node->tn_link = NULL; node->tn_size = 0; break; case VREG: if (node->tn_reg.tn_aobj != NULL) vm_object_deallocate(node->tn_reg.tn_aobj); node->tn_reg.tn_aobj = NULL; pages = node->tn_reg.tn_aobj_pages; break; default: panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); } /* * Clean up fields for the next allocation. The objcache only ctors * new allocations. */ tmpfs_node_ctor(node, NULL, 0); objcache_put(tmp->tm_node_pool, node); /* node is now invalid */ if (pages) atomic_add_long(&tmp->tm_pages_used, -(long)pages); }