static int null_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct vnode *lvp, *ldvp; struct ucred *cred = ap->a_cred; int error, locked; if (vp->v_type == VDIR) return (vop_stdvptocnp(ap)); locked = VOP_ISLOCKED(vp); lvp = NULLVPTOLOWERVP(vp); vhold(lvp); VOP_UNLOCK(vp, 0); /* vp is held by vn_vptocnp_locked that called us */ ldvp = lvp; error = vn_vptocnp(&ldvp, cred, ap->a_buf, ap->a_buflen); vdrop(lvp); if (error != 0) { vn_lock(vp, locked | LK_RETRY); return (ENOENT); } /* * Exclusive lock is required by insmntque1 call in * null_nodeget() */ error = vn_lock(ldvp, LK_EXCLUSIVE); if (error != 0) { vn_lock(vp, locked | LK_RETRY); vdrop(ldvp); return (ENOENT); } vref(ldvp); vdrop(ldvp); error = null_nodeget(vp->v_mount, ldvp, dvp); if (error == 0) { #ifdef DIAGNOSTIC NULLVPTOLOWERVP(*dvp); #endif vhold(*dvp); vput(*dvp); } else vput(ldvp); vn_lock(vp, locked | LK_RETRY); return (error); }
/* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only * if this layer is mounted read-only. */ static int null_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; int flags = cnp->cn_flags; struct vnode *vp, *ldvp, *lvp; int error; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); /* * Although it is possible to call null_bypass(), we'll do * a direct call to reduce overhead */ ldvp = NULLVPTOLOWERVP(dvp); vp = lvp = NULL; error = VOP_LOOKUP(ldvp, &lvp, cnp); if (error == EJUSTRETURN && (flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { *ap->a_vpp = dvp; VREF(dvp); vrele(lvp); } else { error = null_nodeget(dvp->v_mount, lvp, &vp); if (error) vput(lvp); else *ap->a_vpp = vp; } } return (error); }
/* * Mount null layer */ static int nullfs_mount(struct mount *mp) { int error = 0; struct vnode *lowerrootvp, *vp; struct vnode *nullm_rootvp; struct null_mount *xmp; struct thread *td = curthread; char *target; int isvnunlocked = 0, len; struct nameidata nd, *ndp = &nd; NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp); if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_NULLFS)) return (EPERM); if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); /* * Update is a no-op */ if (mp->mnt_flag & MNT_UPDATE) { /* * Only support update mounts for NFS export. */ if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) return (0); else return (EOPNOTSUPP); } /* * Get argument */ error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len); if (error || target[len - 1] != '\0') return (EINVAL); /* * Unlock lower node to avoid possible deadlock. */ if ((mp->mnt_vnodecovered->v_op == &null_vnodeops) && VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) { VOP_UNLOCK(mp->mnt_vnodecovered, 0); isvnunlocked = 1; } /* * Find lower node */ NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread); error = namei(ndp); /* * Re-lock vnode. * XXXKIB This is deadlock-prone as well. */ if (isvnunlocked) vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); /* * Sanity check on lower vnode */ lowerrootvp = ndp->ni_vp; /* * Check multi null mount to avoid `lock against myself' panic. */ if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) { NULLFSDEBUG("nullfs_mount: multi null mount?\n"); vput(lowerrootvp); return (EDEADLK); } xmp = (struct null_mount *) malloc(sizeof(struct null_mount), M_NULLFSMNT, M_WAITOK | M_ZERO); /* * Save reference to underlying FS */ xmp->nullm_vfs = lowerrootvp->v_mount; /* * Save reference. Each mount also holds * a reference on the root vnode. */ error = null_nodeget(mp, lowerrootvp, &vp); /* * Make sure the node alias worked */ if (error) { free(xmp, M_NULLFSMNT); return (error); } /* * Keep a held reference to the root vnode. * It is vrele'd in nullfs_unmount. */ nullm_rootvp = vp; nullm_rootvp->v_vflag |= VV_ROOT; xmp->nullm_rootvp = nullm_rootvp; /* * Unlock the node (either the lower or the alias) */ VOP_UNLOCK(vp, 0); if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); } xmp->nullm_flags |= NULLM_CACHE; if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) xmp->nullm_flags &= ~NULLM_CACHE; MNT_ILOCK(mp); if ((xmp->nullm_flags & NULLM_CACHE) != 0) { mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED); } mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT; MNT_IUNLOCK(mp); mp->mnt_data = xmp; vfs_getnewfsid(mp); if ((xmp->nullm_flags & NULLM_CACHE) != 0) { MNT_ILOCK(xmp->nullm_vfs); TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp, mnt_upper_link); MNT_IUNLOCK(xmp->nullm_vfs); } vfs_mountedfrom(mp, target); NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); return (0); }
/* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as * robust to programmer errors. * * In general, we map all vnodes going down and unmap them on the way back. * As an exception to this, vnodes can be marked "unmapped" by setting * the Nth bit in operation's vdesc_flags. * * Also, some BSD vnode operations have the side effect of vrele'ing * their arguments. With stacking, the reference counts are held * by the upper node, not the lower one, so we must handle these * side-effects here. This is not of concern in Sun-derived systems * since there are no such side-effects. * * This makes the following assumptions: * - only one returned vpp * - no INOUT vpp's (Sun's vop_open has one of these) * - the vnode operation vector of the first vnode should be used * to determine what implementation of the op should be invoked * - all mapped vnodes are of our vnode-type (NEEDSWORK: * problems on rmdir'ing mount points and renaming?) */ int null_bypass(struct vop_generic_args *ap) { struct vnode **this_vp_p; int error; struct vnode *old_vps[VDESC_MAX_VPS]; struct vnode **vps_p[VDESC_MAX_VPS]; struct vnode ***vppp; struct vnodeop_desc *descp = ap->a_desc; int reles, i; if (null_bug_bypass) printf ("null_bypass: %s\n", descp->vdesc_name); #ifdef DIAGNOSTIC /* * We require at least one vp. */ if (descp->vdesc_vp_offsets == NULL || descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) panic ("null_bypass: no vp's in map"); #endif /* * Map the vnodes going in. * Later, we'll invoke the operation based on * the first mapped vnode's operation vector. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ vps_p[i] = this_vp_p = VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); /* * We're not guaranteed that any but the first vnode * are of our type. Check for and don't map any * that aren't. (We must always map first vp or vclean fails.) */ if (i && (*this_vp_p == NULLVP || (*this_vp_p)->v_op != &null_vnodeops)) { old_vps[i] = NULLVP; } else { old_vps[i] = *this_vp_p; *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); /* * XXX - Several operations have the side effect * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ if (reles & VDESC_VP0_WILLRELE) VREF(*this_vp_p); } } /* * Call the operation on the lower layer * with the modified argument structure. */ if (vps_p[0] && *vps_p[0]) error = VCALL(ap); else { printf("null_bypass: no map for %s\n", descp->vdesc_name); error = EINVAL; } /* * Maintain the illusion of call-by-value * by restoring vnodes in the argument structure * to their original value. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; #if 0 if (reles & VDESC_VP0_WILLUNLOCK) VOP_UNLOCK(*(vps_p[i]), 0); #endif if (reles & VDESC_VP0_WILLRELE) vrele(*(vps_p[i])); } } /* * Map the possible out-going vpp * (Assumes that the lower layer always returns * a VREF'ed vpp unless it gets an error.) */ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !(descp->vdesc_flags & VDESC_NOMAP_VPP) && !error) { /* * XXX - even though some ops have vpp returned vp's, * several ops actually vrele this before returning. * We must avoid these ops. * (This should go away when these ops are regularized.) */ if (descp->vdesc_flags & VDESC_VPP_WILLRELE) goto out; vppp = VOPARG_OFFSETTO(struct vnode***, descp->vdesc_vpp_offset,ap); if (*vppp) error = null_nodeget(old_vps[0]->v_mount, **vppp, *vppp); } out: return (error); }
/* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only * if this layer is mounted read-only. */ static int null_lookup(struct vnop_lookup_args * ap) { struct componentname * cnp = ap->a_cnp; struct vnode * dvp = ap->a_dvp; struct vnode *vp, *ldvp, *lvp; struct mount * mp; struct null_mount * null_mp; int error; NULLFSDEBUG("%s parent: %p component: %.*s\n", __FUNCTION__, ap->a_dvp, cnp->cn_namelen, cnp->cn_nameptr); mp = vnode_mount(dvp); /* rename and delete are not allowed. this is a read only file system */ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME || cnp->cn_nameiop == CREATE) { return (EROFS); } null_mp = MOUNTTONULLMOUNT(mp); lck_mtx_lock(&null_mp->nullm_lock); if (nullfs_isspecialvp(dvp)) { error = null_special_lookup(ap); lck_mtx_unlock(&null_mp->nullm_lock); return error; } lck_mtx_unlock(&null_mp->nullm_lock); // . and .. handling if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { vp = dvp; } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { /* mount point crossing is handled in null_special_lookup */ vp = vnode_parent(dvp); } else { goto notdot; } error = vp ? vnode_get(vp) : ENOENT; if (error == 0) { *ap->a_vpp = vp; } return error; } notdot: ldvp = NULLVPTOLOWERVP(dvp); vp = lvp = NULL; /* * Hold ldvp. The reference on it, owned by dvp, is lost in * case of dvp reclamation. */ error = vnode_getwithref(ldvp); if (error) { return error; } error = VNOP_LOOKUP(ldvp, &lvp, cnp, ap->a_context); vnode_put(ldvp); if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { vp = dvp; error = vnode_get(vp); } else { error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0); } if (error == 0) { *ap->a_vpp = vp; } } /* if we got lvp, drop the iocount from VNOP_LOOKUP */ if (lvp != NULL) { vnode_put(lvp); } return (error); }
/* the mountpoint lock should be held going into this function */ static int null_special_lookup(struct vnop_lookup_args * ap) { struct componentname * cnp = ap->a_cnp; struct vnode * dvp = ap->a_dvp; struct vnode * ldvp = NULL; struct vnode * lvp = NULL; struct vnode * vp = NULL; struct mount * mp = vnode_mount(dvp); struct null_mount * null_mp = MOUNTTONULLMOUNT(mp); int error = ENOENT; if (dvp == null_mp->nullm_rootvp) { /* handle . and .. */ if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')) { /* this is the root so both . and .. give back the root */ vp = dvp; error = vnode_get(vp); goto end; } } /* our virtual wrapper directory should be d but D is acceptable if the * lower file system is case insensitive */ if (cnp->cn_namelen == 1 && (cnp->cn_nameptr[0] == 'd' || (null_mp->nullm_flags & NULLM_CASEINSENSITIVE ? cnp->cn_nameptr[0] == 'D' : 0))) { error = 0; if (null_mp->nullm_secondvp == NULL) { error = null_getnewvnode(mp, NULL, dvp, &vp, cnp, 0); if (error) { goto end; } null_mp->nullm_secondvp = vp; } else { vp = null_mp->nullm_secondvp; error = vnode_get(vp); } } } else if (dvp == null_mp->nullm_secondvp) { /* handle . and .. */ if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { vp = dvp; error = vnode_get(vp); goto end; } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { /* parent here is the root vp */ vp = null_mp->nullm_rootvp; error = vnode_get(vp); goto end; } } /* nullmp->nullm_lowerrootvp was set at mount time so don't need to lock to * access it */ /* v_name should be null terminated but cn_nameptr is not necessarily. cn_namelen is the number of characters before the null in either case */ error = vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid); if (error) { goto end; } /* We don't want to mess with case insensitivity and unicode, so the plan to check here is 1. try to get the lower root's parent 2. If we get a parent, then perform a lookup on the lower file system using the parent and the passed in cnp 3. If that worked and we got a vp, then see if the vp is lowerrootvp. If so we got a match 4. Anything else results in ENOENT. */ error = null_get_lowerparent(null_mp->nullm_lowerrootvp, &ldvp, ap->a_context); if (error == 0) { error = VNOP_LOOKUP(ldvp, &lvp, cnp, ap->a_context); vnode_put(ldvp); if (error == 0) { if (lvp == null_mp->nullm_lowerrootvp) { /* always check the hashmap for a vnode for this, the root of the * mirrored system */ error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0); if (error == 0 && null_mp->nullm_thirdcovervp == NULL) { /* if nodeget succeeded then vp has an iocount*/ null_mp->nullm_thirdcovervp = vp; } } else { error = ENOENT; } vnode_put(lvp); } } vnode_put(null_mp->nullm_lowerrootvp); } end: if (error == 0) { *ap->a_vpp = vp; } return error; }
/* * Mount null layer */ static int nullfs_mount(struct mount *mp) { int error = 0; struct vnode *lowerrootvp, *vp; struct vnode *nullm_rootvp; struct null_mount *xmp; char *target; int isvnunlocked = 0, len; struct nameidata nd, *ndp = &nd; NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp); if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); /* * Update is a no-op */ if (mp->mnt_flag & MNT_UPDATE) { /* * Only support update mounts for NFS export. */ if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) return (0); else return (EOPNOTSUPP); } /* * Get argument */ error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len); if (error || target[len - 1] != '\0') return (EINVAL); /* * Unlock lower node to avoid deadlock. * (XXX) VOP_ISLOCKED is needed? */ if ((mp->mnt_vnodecovered->v_op == &null_vnodeops) && VOP_ISLOCKED(mp->mnt_vnodecovered)) { VOP_UNLOCK(mp->mnt_vnodecovered, 0); isvnunlocked = 1; } /* * Find lower node */ NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread); error = namei(ndp); /* * Re-lock vnode. */ if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered)) vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); /* * Sanity check on lower vnode */ lowerrootvp = ndp->ni_vp; /* * Check multi null mount to avoid `lock against myself' panic. */ if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) { NULLFSDEBUG("nullfs_mount: multi null mount?\n"); vput(lowerrootvp); return (EDEADLK); } xmp = (struct null_mount *) malloc(sizeof(struct null_mount), M_NULLFSMNT, M_WAITOK); /* XXX */ /* * Save reference to underlying FS */ xmp->nullm_vfs = lowerrootvp->v_mount; /* * Save reference. Each mount also holds * a reference on the root vnode. */ error = null_nodeget(mp, lowerrootvp, &vp); /* * Make sure the node alias worked */ if (error) { VOP_UNLOCK(vp, 0); vrele(lowerrootvp); free(xmp, M_NULLFSMNT); /* XXX */ return (error); } /* * Keep a held reference to the root vnode. * It is vrele'd in nullfs_unmount. */ nullm_rootvp = vp; nullm_rootvp->v_vflag |= VV_ROOT; xmp->nullm_rootvp = nullm_rootvp; /* * Unlock the node (either the lower or the alias) */ VOP_UNLOCK(vp, 0); if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); } MNT_ILOCK(mp); mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & MNTK_MPSAFE; MNT_IUNLOCK(mp); mp->mnt_data = xmp; vfs_getnewfsid(mp); vfs_mountedfrom(mp, target); NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); return (0); }
/* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only * if this layer is mounted read-only. */ static int null_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; int flags = cnp->cn_flags; struct vnode *vp, *ldvp, *lvp; struct mount *mp; int error; mp = dvp->v_mount; if ((flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); /* * Although it is possible to call null_bypass(), we'll do * a direct call to reduce overhead */ ldvp = NULLVPTOLOWERVP(dvp); vp = lvp = NULL; KASSERT((ldvp->v_vflag & VV_ROOT) == 0 || ((dvp->v_vflag & VV_ROOT) != 0 && (flags & ISDOTDOT) == 0), ("ldvp %p fl %#x dvp %p fl %#x flags %#x", ldvp, ldvp->v_vflag, dvp, dvp->v_vflag, flags)); /* * Hold ldvp. The reference on it, owned by dvp, is lost in * case of dvp reclamation, and we need ldvp to move our lock * from ldvp to dvp. */ vhold(ldvp); error = VOP_LOOKUP(ldvp, &lvp, cnp); /* * VOP_LOOKUP() on lower vnode may unlock ldvp, which allows * dvp to be reclaimed due to shared v_vnlock. Check for the * doomed state and return error. */ if ((error == 0 || error == EJUSTRETURN) && (dvp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; if (lvp != NULL) vput(lvp); /* * If vgone() did reclaimed dvp before curthread * relocked ldvp, the locks of dvp and ldpv are no * longer shared. In this case, relock of ldvp in * lower fs VOP_LOOKUP() does not restore the locking * state of dvp. Compensate for this by unlocking * ldvp and locking dvp, which is also correct if the * locks are still shared. */ VOP_UNLOCK(ldvp, 0); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); } vdrop(ldvp); if (error == EJUSTRETURN && (flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { *ap->a_vpp = dvp; VREF(dvp); vrele(lvp); } else { error = null_nodeget(mp, lvp, &vp); if (error == 0) *ap->a_vpp = vp; } } return (error); }