예제 #1
0
static void
pmap_inval_done(pmap_t pmap)
{
    if (pmap != &kernel_pmap) {
        atomic_add_acq_long(&pmap->pm_invgen, 1);
        atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL);
    }
    crit_exit_id("inval");
}
예제 #2
0
static void
pmap_inval_init(pmap_t pmap)
{
    cpulock_t olock;
    cpulock_t nlock;

    crit_enter_id("inval");

    if (pmap != &kernel_pmap) {
        for (;;) {
            olock = pmap->pm_active_lock;
            cpu_ccfence();
            nlock = olock | CPULOCK_EXCL;
            if (olock != nlock &&
                    atomic_cmpset_int(&pmap->pm_active_lock,
                                      olock, nlock)) {
                break;
            }
            lwkt_process_ipiq();
            cpu_pause();
        }
        atomic_add_acq_long(&pmap->pm_invgen, 1);
    }
}
예제 #3
0
/*
    struct vnop_lookup_args {
	struct vnodeop_desc *a_desc;
	struct vnode *a_dvp;
	struct vnode **a_vpp;
	struct componentname *a_cnp;
    };
*/
int
fuse_vnop_lookup(struct vop_lookup_args *ap)
{
	struct vnode *dvp = ap->a_dvp;
	struct vnode **vpp = ap->a_vpp;
	struct componentname *cnp = ap->a_cnp;
	struct thread *td = cnp->cn_thread;
	struct ucred *cred = cnp->cn_cred;

	int nameiop = cnp->cn_nameiop;
	int flags = cnp->cn_flags;
	int wantparent = flags & (LOCKPARENT | WANTPARENT);
	int islastcn = flags & ISLASTCN;
	struct mount *mp = vnode_mount(dvp);

	int err = 0;
	int lookup_err = 0;
	struct vnode *vp = NULL;

	struct fuse_dispatcher fdi;
	enum fuse_opcode op;

	uint64_t nid;
	struct fuse_access_param facp;

	FS_DEBUG2G("parent_inode=%ju - %*s\n",
	    (uintmax_t)VTOI(dvp), (int)cnp->cn_namelen, cnp->cn_nameptr);

	if (fuse_isdeadfs(dvp)) {
		*vpp = NULL;
		return ENXIO;
	}
	if (!vnode_isdir(dvp)) {
		return ENOTDIR;
	}
	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) {
		return EROFS;
	}
	/*
         * We do access check prior to doing anything else only in the case
         * when we are at fs root (we'd like to say, "we are at the first
         * component", but that's not exactly the same... nevermind).
         * See further comments at further access checks.
         */

	bzero(&facp, sizeof(facp));
	if (vnode_isvroot(dvp)) {	/* early permission check hack */
		if ((err = fuse_internal_access(dvp, VEXEC, &facp, td, cred))) {
			return err;
		}
	}
	if (flags & ISDOTDOT) {
		nid = VTOFUD(dvp)->parent_nid;
		if (nid == 0) {
			return ENOENT;
		}
		fdisp_init(&fdi, 0);
		op = FUSE_GETATTR;
		goto calldaemon;
	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
		nid = VTOI(dvp);
		fdisp_init(&fdi, 0);
		op = FUSE_GETATTR;
		goto calldaemon;
	} else if (fuse_lookup_cache_enable) {
		err = cache_lookup(dvp, vpp, cnp, NULL, NULL);
		switch (err) {

		case -1:		/* positive match */
			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
			return 0;

		case 0:		/* no match in cache */
			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
			break;

		case ENOENT:		/* negative match */
			/* fall through */
		default:
			return err;
		}
	}
	nid = VTOI(dvp);
	fdisp_init(&fdi, cnp->cn_namelen + 1);
	op = FUSE_LOOKUP;

calldaemon:
	fdisp_make(&fdi, op, mp, nid, td, cred);

	if (op == FUSE_LOOKUP) {
		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
	}
	lookup_err = fdisp_wait_answ(&fdi);

	if ((op == FUSE_LOOKUP) && !lookup_err) {	/* lookup call succeeded */
		nid = ((struct fuse_entry_out *)fdi.answ)->nodeid;
		if (!nid) {
			/*
	                 * zero nodeid is the same as "not found",
	                 * but it's also cacheable (which we keep
	                 * keep on doing not as of writing this)
	                 */
			lookup_err = ENOENT;
		} else if (nid == FUSE_ROOT_ID) {
			lookup_err = EINVAL;
		}
	}
	if (lookup_err &&
	    (!fdi.answ_stat || lookup_err != ENOENT || op != FUSE_LOOKUP)) {
		fdisp_destroy(&fdi);
		return lookup_err;
	}
	/* lookup_err, if non-zero, must be ENOENT at this point */

	if (lookup_err) {

		if ((nameiop == CREATE || nameiop == RENAME) && islastcn
		     /* && directory dvp has not been removed */ ) {

			if (vfs_isrdonly(mp)) {
				err = EROFS;
				goto out;
			}
#if 0 /* THINK_ABOUT_THIS */
			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
				goto out;
			}
#endif

			/*
	                 * Possibly record the position of a slot in the
	                 * directory large enough for the new component name.
	                 * This can be recorded in the vnode private data for
	                 * dvp. Set the SAVENAME flag to hold onto the
	                 * pathname for use later in VOP_CREATE or VOP_RENAME.
	                 */
			cnp->cn_flags |= SAVENAME;

			err = EJUSTRETURN;
			goto out;
		}
		/* Consider inserting name into cache. */

		/*
	         * No we can't use negative caching, as the fs
	         * changes are out of our control.
	         * False positives' falseness turns out just as things
	         * go by, but false negatives' falseness doesn't.
	         * (and aiding the caching mechanism with extra control
	         * mechanisms comes quite close to beating the whole purpose
	         * caching...)
	         */
#if 0
		if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) {
			FS_DEBUG("inserting NULL into cache\n");
			cache_enter(dvp, NULL, cnp);
		}
#endif
		err = ENOENT;
		goto out;

	} else {

		/* !lookup_err */

		struct fuse_entry_out *feo = NULL;
		struct fuse_attr *fattr = NULL;

		if (op == FUSE_GETATTR) {
			fattr = &((struct fuse_attr_out *)fdi.answ)->attr;
		} else {
			feo = (struct fuse_entry_out *)fdi.answ;
			fattr = &(feo->attr);
		}

		/*
	         * If deleting, and at end of pathname, return parameters
	         * which can be used to remove file.  If the wantparent flag
	         * isn't set, we return only the directory, otherwise we go on
	         * and lock the inode, being careful with ".".
	         */
		if (nameiop == DELETE && islastcn) {
			/*
	                 * Check for write access on directory.
	                 */
			facp.xuid = fattr->uid;
			facp.facc_flags |= FACCESS_STICKY;
			err = fuse_internal_access(dvp, VWRITE, &facp, td, cred);
			facp.facc_flags &= ~FACCESS_XQUERIES;

			if (err) {
				goto out;
			}
			if (nid == VTOI(dvp)) {
				vref(dvp);
				*vpp = dvp;
			} else {
				err = fuse_vnode_get(dvp->v_mount, nid, dvp,
				    &vp, cnp, IFTOVT(fattr->mode));
				if (err)
					goto out;
				*vpp = vp;
			}

			/*
			 * Save the name for use in VOP_RMDIR and VOP_REMOVE
			 * later.
			 */
			cnp->cn_flags |= SAVENAME;
			goto out;

		}
		/*
	         * If rewriting (RENAME), return the inode and the
	         * information required to rewrite the present directory
	         * Must get inode of directory entry to verify it's a
	         * regular file, or empty directory.
	         */
		if (nameiop == RENAME && wantparent && islastcn) {

#if 0 /* THINK_ABOUT_THIS */
			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
				goto out;
			}
#endif

			/*
	                 * Check for "."
	                 */
			if (nid == VTOI(dvp)) {
				err = EISDIR;
				goto out;
			}
			err = fuse_vnode_get(vnode_mount(dvp),
			    nid,
			    dvp,
			    &vp,
			    cnp,
			    IFTOVT(fattr->mode));
			if (err) {
				goto out;
			}
			*vpp = vp;
			/*
	                 * Save the name for use in VOP_RENAME later.
	                 */
			cnp->cn_flags |= SAVENAME;

			goto out;
		}
		if (flags & ISDOTDOT) {
			struct mount *mp;
			int ltype;

			/*
			 * Expanded copy of vn_vget_ino() so that
			 * fuse_vnode_get() can be used.
			 */
			mp = dvp->v_mount;
			ltype = VOP_ISLOCKED(dvp);
			err = vfs_busy(mp, MBF_NOWAIT);
			if (err != 0) {
				vfs_ref(mp);
				VOP_UNLOCK(dvp, 0);
				err = vfs_busy(mp, 0);
				vn_lock(dvp, ltype | LK_RETRY);
				vfs_rel(mp);
				if (err)
					goto out;
				if ((dvp->v_iflag & VI_DOOMED) != 0) {
					err = ENOENT;
					vfs_unbusy(mp);
					goto out;
				}
			}
			VOP_UNLOCK(dvp, 0);
			err = fuse_vnode_get(vnode_mount(dvp),
			    nid,
			    NULL,
			    &vp,
			    cnp,
			    IFTOVT(fattr->mode));
			vfs_unbusy(mp);
			vn_lock(dvp, ltype | LK_RETRY);
			if ((dvp->v_iflag & VI_DOOMED) != 0) {
				if (err == 0)
					vput(vp);
				err = ENOENT;
			}
			if (err)
				goto out;
			*vpp = vp;
		} else if (nid == VTOI(dvp)) {
			vref(dvp);
			*vpp = dvp;
		} else {
			err = fuse_vnode_get(vnode_mount(dvp),
			    nid,
			    dvp,
			    &vp,
			    cnp,
			    IFTOVT(fattr->mode));
			if (err) {
				goto out;
			}
			fuse_vnode_setparent(vp, dvp);
			*vpp = vp;
		}

		if (op == FUSE_GETATTR) {
			cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ);
		} else {
			cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ);
		}

		/* Insert name into cache if appropriate. */

		/*
	         * Nooo, caching is evil. With caching, we can't avoid stale
	         * information taking over the playground (cached info is not
	         * just positive/negative, it does have qualitative aspects,
	         * too). And a (VOP/FUSE)_GETATTR is always thrown anyway, when
	         * walking down along cached path components, and that's not
	         * any cheaper than FUSE_LOOKUP. This might change with
	         * implementing kernel side attr caching, but... In Linux,
	         * lookup results are not cached, and the daemon is bombarded
	         * with FUSE_LOOKUPS on and on. This shows that by design, the
	         * daemon is expected to handle frequent lookup queries
	         * efficiently, do its caching in userspace, and so on.
	         *
	         * So just leave the name cache alone.
	         */

		/*
	         * Well, now I know, Linux caches lookups, but with a
	         * timeout... So it's the same thing as attribute caching:
	         * we can deal with it when implement timeouts.
	         */
#if 0
		if (cnp->cn_flags & MAKEENTRY) {
			cache_enter(dvp, *vpp, cnp);
		}
#endif
	}
out:
	if (!lookup_err) {

		/* No lookup error; need to clean up. */

		if (err) {		/* Found inode; exit with no vnode. */
			if (op == FUSE_LOOKUP) {
				fuse_internal_forget_send(vnode_mount(dvp), td, cred,
				    nid, 1);
			}
			fdisp_destroy(&fdi);
			return err;
		} else {
#ifndef NO_EARLY_PERM_CHECK_HACK
			if (!islastcn) {
				/*
				 * We have the attributes of the next item
				 * *now*, and it's a fact, and we do not
				 * have to do extra work for it (ie, beg the
				 * daemon), and it neither depends on such
				 * accidental things like attr caching. So
				 * the big idea: check credentials *now*,
				 * not at the beginning of the next call to
				 * lookup.
				 * 
				 * The first item of the lookup chain (fs root)
				 * won't be checked then here, of course, as
				 * its never "the next". But go and see that
				 * the root is taken care about at the very
				 * beginning of this function.
				 * 
				 * Now, given we want to do the access check
				 * this way, one might ask: so then why not
				 * do the access check just after fetching
				 * the inode and its attributes from the
				 * daemon? Why bother with producing the
				 * corresponding vnode at all if something
				 * is not OK? We know what's the deal as
				 * soon as we get those attrs... There is
				 * one bit of info though not given us by
				 * the daemon: whether his response is
				 * authorative or not... His response should
				 * be ignored if something is mounted over
				 * the dir in question. But that can be
				 * known only by having the vnode...
				 */
				int tmpvtype = vnode_vtype(*vpp);

				bzero(&facp, sizeof(facp));
				/*the early perm check hack */
				    facp.facc_flags |= FACCESS_VA_VALID;

				if ((tmpvtype != VDIR) && (tmpvtype != VLNK)) {
					err = ENOTDIR;
				}
				if (!err && !vnode_mountedhere(*vpp)) {
					err = fuse_internal_access(*vpp, VEXEC, &facp, td, cred);
				}
				if (err) {
					if (tmpvtype == VLNK)
						FS_DEBUG("weird, permission error with a symlink?\n");
					vput(*vpp);
					*vpp = NULL;
				}
			}
#endif
		}
	}
	fdisp_destroy(&fdi);

	return err;
}
예제 #4
0
int
sys_vmm_guest_sync_addr(struct vmm_guest_sync_addr_args *uap)
{
    int error = 0;
    cpulock_t olock;
    cpulock_t nlock;
    cpumask_t mask;
    long val;
    struct proc *p = curproc;

    if (p->p_vmm == NULL)
        return ENOSYS;

    crit_enter_id("vmm_inval");

    /*
     * Acquire CPULOCK_EXCL, spin while we wait.  This will prevent
     * any other cpu trying to use related VMMs to wait for us.
     */
    KKASSERT(CPUMASK_TESTMASK(p->p_vmm_cpumask, mycpu->gd_cpumask) == 0);
    for (;;) {
        olock = p->p_vmm_cpulock & ~CPULOCK_EXCL;
        cpu_ccfence();
        nlock = olock | CPULOCK_EXCL;
        if (atomic_cmpset_int(&p->p_vmm_cpulock, olock, nlock))
            break;
        lwkt_process_ipiq();
        cpu_pause();
    }

    /*
     * Wait for other cpu's to exit VMM mode (for this vkernel).  No
     * new cpus will enter VMM mode while we hold the lock.  New waiters
     * may turn-up though so the wakeup() later on has to be
     * unconditional.
     *
     * We must test on p_vmm_cpulock's counter, not the mask, because
     * VMM entries will set the mask bit unconditionally first
     * (interlocking our IPI below) and then conditionally bump the
     * counter.
     */
    if (olock & CPULOCK_CNTMASK) {
        mask = p->p_vmm_cpumask;
        CPUMASK_ANDMASK(mask, mycpu->gd_other_cpus);
        lwkt_send_ipiq_mask(mask, vmm_exit_vmm, NULL);
        while (p->p_vmm_cpulock & CPULOCK_CNTMASK) {
            lwkt_process_ipiq();
            cpu_pause();
        }
    }

#ifndef _KERNEL_VIRTUAL
    /*
     * Ensure that any new entries into VMM mode using
     * vmm's managed under this process will issue a
     * INVEPT before resuming.
     */
    atomic_add_acq_long(&p->p_vmspace->vm_pmap.pm_invgen, 1);
#endif

    /*
     * Make the requested modification, wakeup any waiters.
     */
    if (uap->srcaddr) {
        copyin(uap->srcaddr, &val, sizeof(long));
        copyout(&val, uap->dstaddr, sizeof(long));
    }

    /*
     * VMMs on remote cpus will not be re-entered until we
     * clear the lock.
     */
    atomic_clear_int(&p->p_vmm_cpulock, CPULOCK_EXCL);
#if 0
    wakeup(&p->p_vmm_cpulock);
#endif

    crit_exit_id("vmm_inval");

    return error;
}