/*ARGSUSED*/ static int lo_mount(struct vfs *vfsp, struct vnode *vp, struct mounta *uap, struct cred *cr) { int error; struct vnode *srootvp = NULL; /* the server's root */ struct vnode *realrootvp; struct loinfo *li; int nodev; nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) return (EPERM); /* * Loopback devices which get "nodevices" added can be done without * "nodevices" set because we cannot import devices into a zone * with loopback. Note that we have all zone privileges when * this happens; if not, we'd have gotten "nosuid". */ if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); mutex_enter(&vp->v_lock); if (!(uap->flags & MS_OVERLAY) && (vp->v_count != 1 || (vp->v_flag & VROOT))) { mutex_exit(&vp->v_lock); return (EBUSY); } mutex_exit(&vp->v_lock); /* * Find real root, and make vfs point to real vfs */ if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) return (error); /* * Enforce MAC policy if needed. * * Loopback mounts must not allow writing up. The dominance test * is intended to prevent a global zone caller from accidentally * creating write-up conditions between two labeled zones. * Local zones can't violate MAC on their own without help from * the global zone because they can't name a pathname that * they don't already have. * * The special case check for the NET_MAC_AWARE process flag is * to support the case of the automounter in the global zone. We * permit automounting of local zone directories such as home * directories, into the global zone as required by setlabel, * zonecopy, and saving of desktop sessions. Such mounts are * trusted not to expose the contents of one zone's directories * to another by leaking them through the global zone. */ if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { char specname[MAXPATHLEN]; zone_t *from_zptr; zone_t *to_zptr; if (vnodetopath(NULL, realrootvp, specname, sizeof (specname), CRED()) != 0) { VN_RELE(realrootvp); return (EACCES); } from_zptr = zone_find_by_path(specname); to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); /* * Special case for zone devfs: the zone for /dev will * incorrectly appear as the global zone since it's not * under the zone rootpath. So for zone devfs check allow * read-write mounts. * * Second special case for scratch zones used for Live Upgrade: * this is used to mount the zone's root from /root to /a in * the scratch zone. As with the other special case, this * appears to be outside of the zone because it's not under * the zone rootpath, which is $ZONEPATH/lu in the scratch * zone case. */ if (from_zptr != to_zptr && !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { /* * We know at this point that the labels aren't equal * because the zone pointers aren't equal, and zones * can't share a label. * * If the source is the global zone then making * it available to a local zone must be done in * read-only mode as the label will become admin_low. * * If it is a mount between local zones then if * the current process is in the global zone and has * the NET_MAC_AWARE flag, then regular read-write * access is allowed. If it's in some other zone, but * the label on the mount point dominates the original * source, then allow the mount as read-only * ("read-down"). */ if (from_zptr->zone_id == GLOBAL_ZONEID) { /* make the mount read-only */ vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); } else { /* cross-zone mount */ if (to_zptr->zone_id == GLOBAL_ZONEID && /* LINTED: no consequent */ getpflags(NET_MAC_AWARE, cr) != 0) { /* Allow the mount as read-write */ } else if (bldominates( label2bslabel(to_zptr->zone_slabel), label2bslabel(from_zptr->zone_slabel))) { /* make the mount read-only */ vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); } else { VN_RELE(realrootvp); zone_rele(to_zptr); zone_rele(from_zptr); return (EACCES); } } } zone_rele(to_zptr); zone_rele(from_zptr); } /* * realrootvp may be an AUTOFS node, in which case we * perform a VOP_ACCESS() to trigger the mount of the * intended filesystem, so we loopback mount the intended * filesystem instead of the AUTOFS filesystem. */ (void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL); /* * We're interested in the top most filesystem. * This is specially important when uap->spec is a trigger * AUTOFS node, since we're really interested in mounting the * filesystem AUTOFS mounted as result of the VOP_ACCESS() * call not the AUTOFS node itself. */ if (vn_mountedvfs(realrootvp) != NULL) { if (error = traverse(&realrootvp)) { VN_RELE(realrootvp); return (error); } } /* * Allocate a vfs info struct and attach it */ li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); li->li_realvfs = realrootvp->v_vfsp; li->li_mountvfs = vfsp; /* * Set mount flags to be inherited by loopback vfs's */ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { li->li_mflag |= VFS_RDONLY; } if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); } if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { li->li_mflag |= VFS_NODEVICES; } if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { li->li_mflag |= VFS_NOSETUID; } /* * Permissive flags are added to the "deny" bitmap. */ if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { li->li_dflag |= VFS_XATTR; } if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { li->li_dflag |= VFS_NBMAND; } /* * Propagate inheritable mount flags from the real vfs. */ if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) vfs_setmntopt(vfsp, MNTOPT_RO, NULL, VFS_NODISPLAY); if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, VFS_NODISPLAY); if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, VFS_NODISPLAY); /* * Permissive flags such as VFS_XATTR, as opposed to restrictive flags * such as VFS_RDONLY, are handled differently. An explicit * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. */ if ((li->li_realvfs->vfs_flag & VFS_XATTR) && !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, VFS_NODISPLAY); if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, VFS_NODISPLAY); li->li_refct = 0; vfsp->vfs_data = (caddr_t)li; vfsp->vfs_bcount = 0; vfsp->vfs_fstype = lofsfstype; vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; vfsp->vfs_dev = li->li_realvfs->vfs_dev; vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { li->li_flag |= LO_NOSUB; } /* * Propagate any VFS features */ vfs_propagate_features(li->li_realvfs, vfsp); /* * Setup the hashtable. If the root of this mount isn't a directory, * there's no point in allocating a large hashtable. A table with one * bucket is sufficient. */ if (realrootvp->v_type != VDIR) lsetup(li, 1); else lsetup(li, 0); /* * Make the root vnode */ srootvp = makelonode(realrootvp, li, 0); srootvp->v_flag |= VROOT; li->li_rootvp = srootvp; #ifdef LODEBUG lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", vfsp, li->li_realvfs, srootvp, realrootvp, li); #endif return (0); }
/* * Get/Make vfs structure for given real vfs */ static struct vfs * makelfsnode(struct vfs *vfsp, struct loinfo *li) { struct lfsnode *lfs; struct lfsnode *tlfs; /* * Don't grab any locks for the fast (common) case. */ if (vfsp == li->li_realvfs) return (li->li_mountvfs); ASSERT(li->li_refct > 0); mutex_enter(&li->li_lfslock); if ((lfs = lfsfind(vfsp, li)) == NULL) { mutex_exit(&li->li_lfslock); lfs = kmem_zalloc(sizeof (*lfs), KM_SLEEP); mutex_enter(&li->li_lfslock); if ((tlfs = lfsfind(vfsp, li)) != NULL) { kmem_free(lfs, sizeof (*lfs)); lfs = tlfs; goto found_lfs; } lfs->lfs_realvfs = vfsp; /* * Even though the lfsnode is strictly speaking a private * implementation detail of lofs, it should behave as a regular * vfs_t for the benefit of the rest of the kernel. */ VFS_INIT(&lfs->lfs_vfs, lo_vfsops, (caddr_t)li); lfs->lfs_vfs.vfs_fstype = li->li_mountvfs->vfs_fstype; lfs->lfs_vfs.vfs_flag = ((vfsp->vfs_flag | li->li_mflag) & ~li->li_dflag) & INHERIT_VFS_FLAG; lfs->lfs_vfs.vfs_bsize = vfsp->vfs_bsize; lfs->lfs_vfs.vfs_dev = vfsp->vfs_dev; lfs->lfs_vfs.vfs_fsid = vfsp->vfs_fsid; if (vfsp->vfs_mntpt != NULL) { lfs->lfs_vfs.vfs_mntpt = vfs_getmntpoint(vfsp); /* Leave a reference to the mountpoint */ } (void) VFS_ROOT(vfsp, &lfs->lfs_realrootvp); /* * We use 1 instead of 0 as the value to associate with * an idle lfs_vfs. This is to prevent VFS_RELE() * trying to kmem_free() our lfs_t (which is the wrong * size). */ VFS_HOLD(&lfs->lfs_vfs); lfs->lfs_next = li->li_lfs; li->li_lfs = lfs; vfs_propagate_features(vfsp, &lfs->lfs_vfs); } found_lfs: VFS_HOLD(&lfs->lfs_vfs); mutex_exit(&li->li_lfslock); return (&lfs->lfs_vfs); }