Пример #1
0
static int
swap_file_open(struct chip_swap *swap, const char *swap_file)
{
	struct nameidata nd;
	int flags, error;

	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, swap_file,
	    curthread);

	flags = FWRITE | FREAD | O_NOFOLLOW | O_CREAT | O_TRUNC;

	error = vn_open(&nd, &flags, CHIP_SWAP_CMODE, NULL);
	if (error) {
		nand_debug(NDBG_SIM,"Cannot create swap file %s", swap_file);
		NDFREE(&nd, NDF_ONLY_PNBUF);
		return (error);
	}

	swap->swap_cred = crhold(curthread->td_ucred);
	NDFREE(&nd, NDF_ONLY_PNBUF);

	/* We just unlock so we hold a reference */
	VOP_UNLOCK(nd.ni_vp, 0);

	swap->swap_vp = nd.ni_vp;

	return (0);
}
Пример #2
0
int
randomdev_write_file(const char *filename, void *buf, size_t length)
{
	struct nameidata nd;
	struct thread* td = curthread;
	int error;
	ssize_t resid;
	int flags;

	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
	flags = FWRITE | O_CREAT | O_TRUNC;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error == 0) {
		NDFREE(&nd, NDF_ONLY_PNBUF);
		if (nd.ni_vp->v_type != VREG)
			error = ENOEXEC;
		else
			error = vn_rdwr(UIO_WRITE, nd.ni_vp, buf, length, 0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td);

		VOP_UNLOCK(nd.ni_vp, 0);
		vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	}

	return (error);
}
Пример #3
0
static int
vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
{
	static char buf[128];
	struct nameidata nd;
	off_t ofs;
	ssize_t resid;
	int error, flags, len;

	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);

	NDFREE(&nd, NDF_ONLY_PNBUF);
	ofs = 0;
	len = sizeof(buf) - 1;
	while (1) {
		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
		    NOCRED, &resid, td);
		if (error)
			break;
		if (resid == len)
			break;
		buf[len - resid] = 0;
		sbuf_printf(sb, "%s", buf);
		ofs += len - resid;
	}

	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	return (error);
}
Пример #4
0
int
linux_newstat(struct proc *p, struct linux_newstat_args *args)
{
    struct stat buf;
    struct nameidata nd;
    int error;
    caddr_t sg;

    sg = stackgap_init();
    CHECKALTEXIST(p, &sg, args->path);

#ifdef DEBUG
    printf("Linux-emul(%ld): newstat(%s, *)\n", (long)p->p_pid,
           args->path);
#endif

    NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
           args->path, p);
    error = namei(&nd);
    if (error)
        return (error);
    NDFREE(&nd, NDF_ONLY_PNBUF);

    error = vn_stat(nd.ni_vp, &buf, p);
    vput(nd.ni_vp);
    if (error)
        return (error);

    return (newstat_copyout(&buf, args->buf));
}
Пример #5
0
static void *
kobj_open_file_vnode(const char *file)
{
	struct thread *td = curthread;
	struct filedesc *fd;
	struct nameidata nd;
	int error, flags, vfslocked;

	fd = td->td_proc->p_fd;
	FILEDESC_XLOCK(fd);
	if (fd->fd_rdir == NULL) {
		fd->fd_rdir = rootvnode;
		vref(fd->fd_rdir);
	}
	if (fd->fd_cdir == NULL) {
		fd->fd_cdir = rootvnode;
		vref(fd->fd_cdir);
	}
	FILEDESC_XUNLOCK(fd);

	flags = FREAD | O_NOFOLLOW;
	NDINIT(&nd, LOOKUP, MPSAFE, UIO_SYSSPACE, file, td);
	error = vn_open_cred(&nd, &flags, 0, 0, curthread->td_ucred, NULL);
	if (error != 0)
		return (NULL);
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	/* We just unlock so we hold a reference. */
	VOP_UNLOCK(nd.ni_vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	return (nd.ni_vp);
}
Пример #6
0
int
alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
               int size, int flags)
{
    struct thread *td;
    struct nameidata nd;
    struct alq *alq;
    int oflags;
    int error;
    int vfslocked;

    KASSERT((size > 0), ("%s: size <= 0", __func__));

    *alqp = NULL;
    td = curthread;

    NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
    oflags = FWRITE | O_NOFOLLOW | O_CREAT;

    error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
    if (error)
        return (error);

    vfslocked = NDHASGIANT(&nd);
    NDFREE(&nd, NDF_ONLY_PNBUF);
    /* We just unlock so we hold a reference */
    VOP_UNLOCK(nd.ni_vp, 0);
    VFS_UNLOCK_GIANT(vfslocked);

    alq = bsd_malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
    alq->aq_vp = nd.ni_vp;
    alq->aq_cred = crhold(cred);

    mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);

    alq->aq_buflen = size;
    alq->aq_entmax = 0;
    alq->aq_entlen = 0;

    alq->aq_freebytes = alq->aq_buflen;
    alq->aq_entbuf = bsd_malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
    alq->aq_writehead = alq->aq_writetail = 0;
    if (flags & ALQ_ORDERED)
        alq->aq_flags |= AQ_ORDERED;

    if ((error = ald_add(alq)) != 0) {
        alq_destroy(alq);
        return (error);
    }

    *alqp = alq;

    return (0);
}
/*
 * Look up a file name. Basically just initialize stuff and call namei().
 */
int
nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p)
{
	int error;

	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p);
	error = namei(ndp);
	if (!error) {
		NDFREE(ndp, NDF_ONLY_PNBUF);
	}
	return (error);
}
Пример #8
0
static int
parse_mount_dev_present(const char *dev)
{
	struct nameidata nd;
	int error;

	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
	error = namei(&nd);
	if (!error)
		vput(nd.ni_vp);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	return (error != 0) ? 0 : 1;
}
Пример #9
0
int
osi_lookupname(char *aname, enum uio_seg seg, int followlink,
	       struct vnode **vpp)
{
    struct nameidata n;
    int flags, error, glocked;

#ifdef AFS_FBSD50_ENV
    glocked = ISAFS_GLOCK();
    if (glocked)
	AFS_GUNLOCK();
#endif

    flags = 0;
    flags = LOCKLEAF;
    if (followlink)
	flags |= FOLLOW;
    else
	flags |= NOFOLLOW;
#ifdef AFS_FBSD80_ENV
    flags |= MPSAFE; /* namei must take GIANT if needed */
#endif
    NDINIT(&n, LOOKUP, flags, seg, aname, curproc);
    if ((error = namei(&n)) != 0) {
#ifdef AFS_FBSD50_ENV
	if (glocked)
	    AFS_GLOCK();
#endif
	return error;
    }
    *vpp = n.ni_vp;
    /* XXX should we do this?  Usually NOT (matt) */
#if defined(AFS_FBSD80_ENV)
    /*VOP_UNLOCK(n.ni_vp, 0);*/
#elif defined(AFS_FBSD50_ENV)
    VOP_UNLOCK(n.ni_vp, 0, curthread);
#else
    VOP_UNLOCK(n.ni_vp, 0, curproc);
#endif
    NDFREE(&n, NDF_ONLY_PNBUF);
#ifdef AFS_FBSD50_ENV
    if (glocked)
	AFS_GLOCK();
#endif
    return 0;
}
Пример #10
0
static void *
kobj_open_file_vnode(const char *file)
{
	struct thread *td = curthread;
	struct nameidata nd;
	int error, flags;

	pwd_ensure_dirs();

	flags = FREAD | O_NOFOLLOW;
	NDINIT(&nd, LOOKUP, 0, UIO_SYSSPACE, file, td);
	error = vn_open_cred(&nd, &flags, 0, 0, curthread->td_ucred, NULL);
	if (error != 0)
		return (NULL);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	/* We just unlock so we hold a reference. */
	VOP_UNLOCK(nd.ni_vp, 0);
	return (nd.ni_vp);
}
Пример #11
0
int
lookupnameat(char *dirname, enum uio_seg seg, enum symfollow follow,
    vnode_t **dirvpp, vnode_t **compvpp, vnode_t *startvp)
{
	struct nameidata nd;
	int error, ltype;

	ASSERT(dirvpp == NULL);

	vref(startvp);
	ltype = VOP_ISLOCKED(startvp);
	VOP_UNLOCK(startvp, 0);
	NDINIT_ATVP(&nd, LOOKUP, LOCKLEAF | MPSAFE | follow, seg, dirname,
	    startvp, curthread);
	error = namei(&nd);
	*compvpp = nd.ni_vp;
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vn_lock(startvp, ltype | LK_RETRY);
	return (error);
}
Пример #12
0
int
osi_lookupname(char *aname, enum uio_seg seg, int followlink,
	       struct vnode **vpp)
{
    struct nameidata n;
    int flags, error, glocked;

    glocked = ISAFS_GLOCK();
    if (glocked)
	AFS_GUNLOCK();

#if __FreeBSD_version >= 1000021 /* MPSAFE is gone for good! */
    flags = 0;
#else
    flags = MPSAFE; /* namei must take Giant if needed */
#endif
    if (followlink)
	flags |= FOLLOW;
    else
	flags |= NOFOLLOW;
    NDINIT(&n, LOOKUP, flags, seg, aname, curthread);
    if ((error = namei(&n)) != 0) {
	if (glocked)
	    AFS_GLOCK();
	return error;
    }
    *vpp = n.ni_vp;
    /* XXX should we do this?  Usually NOT (matt) */
#if defined(AFS_FBSD80_ENV)
    /*VOP_UNLOCK(n.ni_vp, 0);*/
#else
    VOP_UNLOCK(n.ni_vp, 0, curthread);
#endif
    NDFREE(&n, NDF_ONLY_PNBUF);
    if (glocked)
	AFS_GLOCK();
    return 0;
}
Пример #13
0
/* ARGSUSED */
int
auditctl(struct thread *td, struct auditctl_args *uap)
{
	struct nameidata nd;
	struct ucred *cred;
	struct vnode *vp;
	int error = 0;
	int flags, vfslocked;

	if (jailed(td->td_ucred))
		return (ENOSYS);
	error = priv_check(td, PRIV_AUDIT_CONTROL);
	if (error)
		return (error);

	vp = NULL;
	cred = NULL;

	/*
	 * If a path is specified, open the replacement vnode, perform
	 * validity checks, and grab another reference to the current
	 * credential.
	 *
	 * On Darwin, a NULL path argument is also used to disable audit.
	 */
	if (uap->path == NULL)
		return (EINVAL);

	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
	    UIO_USERSPACE, uap->path, td);
	flags = AUDIT_OPEN_FLAGS;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);
	vfslocked = NDHASGIANT(&nd);
	vp = nd.ni_vp;
#ifdef MAC
	error = mac_system_check_auditctl(td->td_ucred, vp);
	VOP_UNLOCK(vp, 0);
	if (error) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		return (error);
	}
#else
	VOP_UNLOCK(vp, 0);
#endif
	NDFREE(&nd, NDF_ONLY_PNBUF);
	if (vp->v_type != VREG) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		return (EINVAL);
	}
	VFS_UNLOCK_GIANT(vfslocked);
	cred = td->td_ucred;
	crhold(cred);

	/*
	 * XXXAUDIT: Should audit_suspended actually be cleared by
	 * audit_worker?
	 */
	audit_suspended = 0;

	audit_rotate_vnode(cred, vp);

	return (error);
}
Пример #14
0
static int
coff_load_file(struct thread *td, char *name)
{
	struct proc *p = td->td_proc;
  	struct vmspace *vmspace = p->p_vmspace;
  	int error;
  	struct nameidata nd;
  	struct vnode *vp;
  	struct vattr attr;
  	struct filehdr *fhdr;
  	struct aouthdr *ahdr;
  	struct scnhdr *scns;
  	char *ptr = 0;
  	int nscns;
  	unsigned long text_offset = 0, text_address = 0, text_size = 0;
  	unsigned long data_offset = 0, data_address = 0, data_size = 0;
  	unsigned long bss_size = 0;
  	int i;

	NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME,
	    UIO_SYSSPACE, name, td);

  	error = namei(&nd);
  	if (error)
    		return error;

  	vp = nd.ni_vp;
  	if (vp == NULL)
    		return ENOEXEC;

  	if (vp->v_writecount) {
    		error = ETXTBSY;
    		goto fail;
  	}

  	if ((error = VOP_GETATTR(vp, &attr, td->td_ucred, td)) != 0)
    		goto fail;

  	if ((vp->v_mount->mnt_flag & MNT_NOEXEC)
	    || ((attr.va_mode & 0111) == 0)
	    || (attr.va_type != VREG))
    		goto fail;

  	if (attr.va_size == 0) {
    		error = ENOEXEC;
    		goto fail;
  	}

  	if ((error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td)) != 0)
    		goto fail;

  	if ((error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL)) != 0)
    		goto fail;

	/*
	 * Lose the lock on the vnode. It's no longer needed, and must not
	 * exist for the pagefault paging to work below.
	 */
	VOP_UNLOCK(vp, 0, td);

  	if ((error = vm_mmap(kernel_map,
			    (vm_offset_t *) &ptr,
			    PAGE_SIZE,
			    VM_PROT_READ,
		       	    VM_PROT_READ,
			    0,
			    OBJT_VNODE,
			    vp,
			    0)) != 0)
		goto unlocked_fail;

  	fhdr = (struct filehdr *)ptr;

  	if (fhdr->f_magic != I386_COFF) {
    		error = ENOEXEC;
    		goto dealloc_and_fail;
  	}

  	nscns = fhdr->f_nscns;

  	if ((nscns * sizeof(struct scnhdr)) > PAGE_SIZE) {
    		/*
     		 * XXX -- just fail.  I'm so lazy.
     		 */
    		error = ENOEXEC;
    		goto dealloc_and_fail;
  	}

  	ahdr = (struct aouthdr*)(ptr + sizeof(struct filehdr));

  	scns = (struct scnhdr*)(ptr + sizeof(struct filehdr)
			  + sizeof(struct aouthdr));

  	for (i = 0; i < nscns; i++) {
    		if (scns[i].s_flags & STYP_NOLOAD)
      			continue;
    		else if (scns[i].s_flags & STYP_TEXT) {
      			text_address = scns[i].s_vaddr;
      			text_size = scns[i].s_size;
      			text_offset = scns[i].s_scnptr;
    		}
		else if (scns[i].s_flags & STYP_DATA) {
      			data_address = scns[i].s_vaddr;
      			data_size = scns[i].s_size;
      			data_offset = scns[i].s_scnptr;
    		} else if (scns[i].s_flags & STYP_BSS) {
      			bss_size = scns[i].s_size;
    		}
  	}

  	if ((error = load_coff_section(vmspace, vp, text_offset,
				      (caddr_t)(void *)(uintptr_t)text_address,
				      text_size, text_size,
				      VM_PROT_READ | VM_PROT_EXECUTE)) != 0) {
    		goto dealloc_and_fail;
  	}
  	if ((error = load_coff_section(vmspace, vp, data_offset,
				      (caddr_t)(void *)(uintptr_t)data_address,
				      data_size + bss_size, data_size,
				      VM_PROT_ALL)) != 0) {
    		goto dealloc_and_fail;
  	}

  	error = 0;

 dealloc_and_fail:
	if (vm_map_remove(kernel_map,
			  (vm_offset_t) ptr,
			  (vm_offset_t) ptr + PAGE_SIZE))
    		panic("%s vm_map_remove failed", __func__);

 fail:
	VOP_UNLOCK(vp, 0, td);
 unlocked_fail:
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vrele(nd.ni_vp);
  	return error;
}
Пример #15
0
/*
 * Q_QUOTAON - set up a quota file for a particular filesystem.
 */
int
quotaon(struct thread *td, struct mount *mp, int type, void *fname)
{
    struct ufsmount *ump;
    struct vnode *vp, **vpp;
    struct vnode *mvp;
    struct dquot *dq;
    int error, flags;
    struct nameidata nd;

    error = priv_check(td, PRIV_UFS_QUOTAON);
    if (error != 0) {
        vfs_unbusy(mp);
        return (error);
    }

    if ((mp->mnt_flag & MNT_RDONLY) != 0) {
        vfs_unbusy(mp);
        return (EROFS);
    }

    ump = VFSTOUFS(mp);
    dq = NODQUOT;

    NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td);
    flags = FREAD | FWRITE;
    vfs_ref(mp);
    vfs_unbusy(mp);
    error = vn_open(&nd, &flags, 0, NULL);
    if (error != 0) {
        vfs_rel(mp);
        return (error);
    }
    NDFREE(&nd, NDF_ONLY_PNBUF);
    vp = nd.ni_vp;
    error = vfs_busy(mp, MBF_NOWAIT);
    vfs_rel(mp);
    if (error == 0) {
        if (vp->v_type != VREG) {
            error = EACCES;
            vfs_unbusy(mp);
        }
    }
    if (error != 0) {
        VOP_UNLOCK(vp, 0);
        (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
        return (error);
    }

    UFS_LOCK(ump);
    if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) {
        UFS_UNLOCK(ump);
        VOP_UNLOCK(vp, 0);
        (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
        vfs_unbusy(mp);
        return (EALREADY);
    }
    ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING;
    UFS_UNLOCK(ump);
    if ((error = dqopen(vp, ump, type)) != 0) {
        VOP_UNLOCK(vp, 0);
        UFS_LOCK(ump);
        ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
        UFS_UNLOCK(ump);
        (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
        vfs_unbusy(mp);
        return (error);
    }
    VOP_UNLOCK(vp, 0);
    MNT_ILOCK(mp);
    mp->mnt_flag |= MNT_QUOTA;
    MNT_IUNLOCK(mp);

    vpp = &ump->um_quotas[type];
    if (*vpp != vp)
        quotaoff1(td, mp, type);

    /*
     * When the directory vnode containing the quota file is
     * inactivated, due to the shared lookup of the quota file
     * vput()ing the dvp, the qsyncvp() call for the containing
     * directory would try to acquire the quota lock exclusive.
     * At the same time, lookup already locked the quota vnode
     * shared.  Mark the quota vnode lock as allowing recursion
     * and automatically converting shared locks to exclusive.
     *
     * Also mark quota vnode as system.
     */
    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    vp->v_vflag |= VV_SYSTEM;
    VN_LOCK_AREC(vp);
    VN_LOCK_DSHARE(vp);
    VOP_UNLOCK(vp, 0);
    *vpp = vp;
    /*
     * Save the credential of the process that turned on quotas.
     * Set up the time limits for this quota.
     */
    ump->um_cred[type] = crhold(td->td_ucred);
    ump->um_btime[type] = MAX_DQ_TIME;
    ump->um_itime[type] = MAX_IQ_TIME;
    if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
        if (dq->dq_btime > 0)
            ump->um_btime[type] = dq->dq_btime;
        if (dq->dq_itime > 0)
            ump->um_itime[type] = dq->dq_itime;
        dqrele(NULLVP, dq);
    }
    /*
     * Allow the getdq from getinoquota below to read the quota
     * from file.
     */
    UFS_LOCK(ump);
    ump->um_qflags[type] &= ~QTF_CLOSING;
    UFS_UNLOCK(ump);
    /*
     * Search vnodes associated with this mount point,
     * adding references to quota file being opened.
     * NB: only need to add dquot's for inodes being modified.
     */
again:
    MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
        if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
            MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
            goto again;
        }
        if (vp->v_type == VNON || vp->v_writecount == 0) {
            VOP_UNLOCK(vp, 0);
            vrele(vp);
            continue;
        }
        error = getinoquota(VTOI(vp));
        VOP_UNLOCK(vp, 0);
        vrele(vp);
        if (error) {
            MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
            break;
        }
    }

    if (error)
        quotaoff_inchange(td, mp, type);
    UFS_LOCK(ump);
    ump->um_qflags[type] &= ~QTF_OPENING;
    KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0,
            ("quotaon: leaking flags"));
    UFS_UNLOCK(ump);

    vfs_unbusy(mp);
    return (error);
}
Пример #16
0
static int
udf_mount(struct mount *mp)
{
	struct vnode *devvp;	/* vnode of the mount device */
	struct thread *td;
	struct udf_mnt *imp = NULL;
	struct vfsoptlist *opts;
	char *fspec, *cs_disk, *cs_local;
	int error, len, *udf_flags;
	struct nameidata nd, *ndp = &nd;

	td = curthread;
	opts = mp->mnt_optnew;

	/*
	 * Unconditionally mount as read-only.
	 */
	MNT_ILOCK(mp);
	mp->mnt_flag |= MNT_RDONLY;
	MNT_IUNLOCK(mp);

	/*
	 * No root filesystem support.  Probably not a big deal, since the
	 * bootloader doesn't understand UDF.
	 */
	if (mp->mnt_flag & MNT_ROOTFS)
		return (ENOTSUP);

	fspec = NULL;
	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
	if (!error && fspec[len - 1] != '\0')
		return (EINVAL);

	if (mp->mnt_flag & MNT_UPDATE) {
		return (0);
	}

	/* Check that the mount device exists */
	if (fspec == NULL)
		return (EINVAL);
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
	if ((error = namei(ndp)))
		return (error);
	NDFREE(ndp, NDF_ONLY_PNBUF);
	devvp = ndp->ni_vp;

	if (vn_isdisk(devvp, &error) == 0) {
		vput(devvp);
		return (error);
	}

	/* Check the access rights on the mount device */
	error = VOP_ACCESS(devvp, VREAD, td->td_ucred, td);
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
	}

	if ((error = udf_mountfs(devvp, mp))) {
		vrele(devvp);
		return (error);
	}

	imp = VFSTOUDFFS(mp);

	udf_flags = NULL;
	error = vfs_getopt(opts, "flags", (void **)&udf_flags, &len);
	if (error || len != sizeof(int))
		return (EINVAL);
	imp->im_flags = *udf_flags;

	if (imp->im_flags & UDFMNT_KICONV && udf_iconv) {
		cs_disk = NULL;
		error = vfs_getopt(opts, "cs_disk", (void **)&cs_disk, &len);
		if (!error && cs_disk[len - 1] != '\0')
			return (EINVAL);
		cs_local = NULL;
		error = vfs_getopt(opts, "cs_local", (void **)&cs_local, &len);
		if (!error && cs_local[len - 1] != '\0')
			return (EINVAL);
		udf_iconv->open(cs_local, cs_disk, &imp->im_d2l);
#if 0
		udf_iconv->open(cs_disk, cs_local, &imp->im_l2d);
#endif
	}

	vfs_mountedfrom(mp, fspec);
	return 0;
};
Пример #17
0
/*ARGSUSED*/
int
coda_mount(struct mount *vfsp)
{
	struct vnode *dvp;
	struct cnode *cp;
	struct cdev *dev;
	struct coda_mntinfo *mi;
	struct vnode *rootvp;
	struct CodaFid rootfid = INVAL_FID;
	struct CodaFid ctlfid = CTL_FID;
	int error;
	struct nameidata ndp;
	ENTRY;
	char *from;

	if (vfs_filteropt(vfsp->mnt_optnew, coda_opts))
		return (EINVAL);
	from = vfs_getopts(vfsp->mnt_optnew, "from", &error);
	if (error)
		return (error);
	coda_vfsopstats_init();
	coda_vnodeopstats_init();
	MARK_ENTRY(CODA_MOUNT_STATS);
	if (CODA_MOUNTED(vfsp)) {
		MARK_INT_FAIL(CODA_MOUNT_STATS);
		return (EBUSY);
	}

	/*
	 * Validate mount device.  Similar to getmdev().
	 */
	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, from, curthread);
	error = namei(&ndp);
	dvp = ndp.ni_vp;
	if (error) {
		MARK_INT_FAIL(CODA_MOUNT_STATS);
		return (error);
	}
	if (dvp->v_type != VCHR) {
		MARK_INT_FAIL(CODA_MOUNT_STATS);
		vrele(dvp);
		NDFREE(&ndp, NDF_ONLY_PNBUF);
		return (ENXIO);
	}
	dev = dvp->v_rdev;
	vrele(dvp);
	NDFREE(&ndp, NDF_ONLY_PNBUF);

	/*
	 * Initialize the mount record and link it to the vfs struct.
	 */
	mi = dev2coda_mntinfo(dev);
	if (!mi) {
		MARK_INT_FAIL(CODA_MOUNT_STATS);
		printf("Coda mount: %s is not a cfs device\n", from);
		return (ENXIO);
	}
	if (!VC_OPEN(&mi->mi_vcomm)) {
		MARK_INT_FAIL(CODA_MOUNT_STATS);
		return (ENODEV);
	}

	/*
	 * No initialization (here) of mi_vcomm!
	 */
	vfsp->mnt_data = mi;
	vfs_getnewfsid (vfsp);
	mi->mi_vfsp = vfsp;
	mi->mi_started = 0;			/* XXX See coda_root() */

	/*
	 * Make a root vnode to placate the Vnode interface, but don't
	 * actually make the CODA_ROOT call to venus until the first call to
	 * coda_root in case a server is down while venus is starting.
	 */
	cp = make_coda_node(&rootfid, vfsp, VDIR);
	rootvp = CTOV(cp);
	rootvp->v_vflag |= VV_ROOT;
	cp = make_coda_node(&ctlfid, vfsp, VREG);
	coda_ctlvp = CTOV(cp);

	/*
	 * Add vfs and rootvp to chain of vfs hanging off mntinfo.
	 */
	mi->mi_vfsp = vfsp;
	mi->mi_rootvp = rootvp;
	vfs_mountedfrom(vfsp, from);

	/*
	 * Error is currently guaranteed to be zero, but in case some code
	 * changes...
	 */
	CODADEBUG(1, myprintf(("coda_mount returned %d\n", error)););
Пример #18
0
static int
link_elf_load_file(linker_class_t cls, const char *filename,
    linker_file_t *result)
{
	struct nameidata nd;
	struct thread *td = curthread;	/* XXX */
	Elf_Ehdr *hdr;
	Elf_Shdr *shdr;
	Elf_Sym *es;
	int nbytes, i, j;
	vm_offset_t mapbase;
	size_t mapsize;
	int error = 0;
	ssize_t resid;
	int flags;
	elf_file_t ef;
	linker_file_t lf;
	int symtabindex;
	int symstrindex;
	int shstrindex;
	int nsym;
	int pb, rl, ra;
	int alignmask;

	shdr = NULL;
	lf = NULL;
	mapsize = 0;
	hdr = NULL;

	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return error;
	NDFREE(&nd, NDF_ONLY_PNBUF);
	if (nd.ni_vp->v_type != VREG) {
		error = ENOEXEC;
		goto out;
	}
#ifdef MAC
	error = mac_kld_check_load(td->td_ucred, nd.ni_vp);
	if (error) {
		goto out;
	}
#endif

	/* Read the elf header from the file. */
	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)hdr, sizeof(*hdr), 0,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = ENOEXEC;
		goto out;
	}

	if (!IS_ELF(*hdr)) {
		error = ENOEXEC;
		goto out;
	}

	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
		link_elf_error(filename, "Unsupported file layout");
		error = ENOEXEC;
		goto out;
	}
	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
	    || hdr->e_version != EV_CURRENT) {
		link_elf_error(filename, "Unsupported file version");
		error = ENOEXEC;
		goto out;
	}
	if (hdr->e_type != ET_REL) {
		error = ENOSYS;
		goto out;
	}
	if (hdr->e_machine != ELF_TARG_MACH) {
		link_elf_error(filename, "Unsupported machine");
		error = ENOEXEC;
		goto out;
	}

	lf = linker_make_file(filename, &link_elf_class);
	if (!lf) {
		error = ENOMEM;
		goto out;
	}
	ef = (elf_file_t) lf;
	ef->nprogtab = 0;
	ef->e_shdr = 0;
	ef->nreltab = 0;
	ef->nrelatab = 0;

	/* Allocate and read in the section header */
	nbytes = hdr->e_shnum * hdr->e_shentsize;
	if (nbytes == 0 || hdr->e_shoff == 0 ||
	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
		error = ENOEXEC;
		goto out;
	}
	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
	ef->e_shdr = shdr;
	error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes, hdr->e_shoff,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td);
	if (error)
		goto out;
	if (resid) {
		error = ENOEXEC;
		goto out;
	}

	/* Scan the section header for information and table sizing. */
	nsym = 0;
	symtabindex = -1;
	symstrindex = -1;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			ef->nprogtab++;
			break;
		case SHT_SYMTAB:
			nsym++;
			symtabindex = i;
			symstrindex = shdr[i].sh_link;
			break;
		case SHT_REL:
			ef->nreltab++;
			break;
		case SHT_RELA:
			ef->nrelatab++;
			break;
		case SHT_STRTAB:
			break;
		}
	}
	if (ef->nprogtab == 0) {
		link_elf_error(filename, "file has no contents");
		error = ENOEXEC;
		goto out;
	}
	if (nsym != 1) {
		/* Only allow one symbol table for now */
		link_elf_error(filename, "file has no valid symbol table");
		error = ENOEXEC;
		goto out;
	}
	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
	    shdr[symstrindex].sh_type != SHT_STRTAB) {
		link_elf_error(filename, "file has invalid symbol strings");
		error = ENOEXEC;
		goto out;
	}

	/* Allocate space for tracking the load chunks */
	if (ef->nprogtab != 0)
		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
		    M_LINKER, M_WAITOK | M_ZERO);
	if (ef->nreltab != 0)
		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
		    M_LINKER, M_WAITOK | M_ZERO);
	if (ef->nrelatab != 0)
		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
		    M_LINKER, M_WAITOK | M_ZERO);

	if (symtabindex == -1)
		panic("lost symbol table index");
	/* Allocate space for and load the symbol table */
	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ef->ddbsymtab,
	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = EINVAL;
		goto out;
	}

	if (symstrindex == -1)
		panic("lost symbol string index");
	/* Allocate space for and load the symbol strings */
	ef->ddbstrcnt = shdr[symstrindex].sh_size;
	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, ef->ddbstrtab,
	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = EINVAL;
		goto out;
	}

	/* Do we have a string table for the section names?  */
	shstrindex = -1;
	if (hdr->e_shstrndx != 0 &&
	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
		shstrindex = hdr->e_shstrndx;
		ef->shstrcnt = shdr[shstrindex].sh_size;
		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
		    M_WAITOK);
		error = vn_rdwr(UIO_READ, nd.ni_vp, ef->shstrtab,
		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
		    &resid, td);
		if (error)
			goto out;
		if (resid != 0){
			error = EINVAL;
			goto out;
		}
	}

	/* Size up code/data(progbits) and bss(nobits). */
	alignmask = 0;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			alignmask = shdr[i].sh_addralign - 1;
			mapsize += alignmask;
			mapsize &= ~alignmask;
			mapsize += shdr[i].sh_size;
			break;
		}
	}

	/*
	 * We know how much space we need for the text/data/bss/etc.
	 * This stuff needs to be in a single chunk so that profiling etc
	 * can get the bounds and gdb can associate offsets with modules
	 */
	ef->object = vm_object_allocate(OBJT_DEFAULT,
	    round_page(mapsize) >> PAGE_SHIFT);
	if (ef->object == NULL) {
		error = ENOMEM;
		goto out;
	}
	ef->address = (caddr_t) vm_map_min(kernel_map);

	/*
	 * In order to satisfy amd64's architectural requirements on the
	 * location of code and data in the kernel's address space, request a
	 * mapping that is above the kernel.  
	 */
#ifdef __amd64__
	mapbase = KERNBASE;
#else
	mapbase = VM_MIN_KERNEL_ADDRESS;
#endif
	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
	    VM_PROT_ALL, 0);
	if (error) {
		vm_object_deallocate(ef->object);
		ef->object = 0;
		goto out;
	}

	/* Wire the pages */
	error = vm_map_wire(kernel_map, mapbase,
	    mapbase + round_page(mapsize),
	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
	if (error != KERN_SUCCESS) {
		error = ENOMEM;
		goto out;
	}

	/* Inform the kld system about the situation */
	lf->address = ef->address = (caddr_t)mapbase;
	lf->size = mapsize;

	/*
	 * Now load code/data(progbits), zero bss(nobits), allocate space for
	 * and load relocs
	 */
	pb = 0;
	rl = 0;
	ra = 0;
	alignmask = 0;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			alignmask = shdr[i].sh_addralign - 1;
			mapbase += alignmask;
			mapbase &= ~alignmask;
			if (ef->shstrtab && shdr[i].sh_name != 0)
				ef->progtab[pb].name =
				    ef->shstrtab + shdr[i].sh_name;
			else if (shdr[i].sh_type == SHT_PROGBITS)
				ef->progtab[pb].name = "<<PROGBITS>>";
			else
				ef->progtab[pb].name = "<<NOBITS>>";
			if (ef->progtab[pb].name != NULL && 
			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
				ef->progtab[pb].addr =
				    dpcpu_alloc(shdr[i].sh_size);
#ifdef VIMAGE
			else if (ef->progtab[pb].name != NULL &&
			    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
				ef->progtab[pb].addr =
				    vnet_data_alloc(shdr[i].sh_size);
#endif
			else
				ef->progtab[pb].addr =
				    (void *)(uintptr_t)mapbase;
			if (ef->progtab[pb].addr == NULL) {
				error = ENOSPC;
				goto out;
			}
			ef->progtab[pb].size = shdr[i].sh_size;
			ef->progtab[pb].sec = i;
			if (shdr[i].sh_type == SHT_PROGBITS) {
				error = vn_rdwr(UIO_READ, nd.ni_vp,
				    ef->progtab[pb].addr,
				    shdr[i].sh_size, shdr[i].sh_offset,
				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
				    NOCRED, &resid, td);
				if (error)
					goto out;
				if (resid != 0){
					error = EINVAL;
					goto out;
				}
				/* Initialize the per-cpu or vnet area. */
				if (ef->progtab[pb].addr != (void *)mapbase &&
				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
					dpcpu_copy(ef->progtab[pb].addr,
					    shdr[i].sh_size);
#ifdef VIMAGE
				else if (ef->progtab[pb].addr !=
				    (void *)mapbase &&
				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
					vnet_data_copy(ef->progtab[pb].addr,
					    shdr[i].sh_size);
#endif
			} else
				bzero(ef->progtab[pb].addr, shdr[i].sh_size);

			/* Update all symbol values with the offset. */
			for (j = 0; j < ef->ddbsymcnt; j++) {
				es = &ef->ddbsymtab[j];
				if (es->st_shndx != i)
					continue;
				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
			}
			mapbase += shdr[i].sh_size;
			pb++;
			break;
		case SHT_REL:
			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
			    M_WAITOK);
			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
			ef->reltab[rl].sec = shdr[i].sh_info;
			error = vn_rdwr(UIO_READ, nd.ni_vp,
			    (void *)ef->reltab[rl].rel,
			    shdr[i].sh_size, shdr[i].sh_offset,
			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
			    &resid, td);
			if (error)
				goto out;
			if (resid != 0){
				error = EINVAL;
				goto out;
			}
			rl++;
			break;
		case SHT_RELA:
			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
			    M_WAITOK);
			ef->relatab[ra].nrela =
			    shdr[i].sh_size / sizeof(Elf_Rela);
			ef->relatab[ra].sec = shdr[i].sh_info;
			error = vn_rdwr(UIO_READ, nd.ni_vp,
			    (void *)ef->relatab[ra].rela,
			    shdr[i].sh_size, shdr[i].sh_offset,
			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
			    &resid, td);
			if (error)
				goto out;
			if (resid != 0){
				error = EINVAL;
				goto out;
			}
			ra++;
			break;
		}
	}
	if (pb != ef->nprogtab)
		panic("lost progbits");
	if (rl != ef->nreltab)
		panic("lost reltab");
	if (ra != ef->nrelatab)
		panic("lost relatab");
	if (mapbase != (vm_offset_t)ef->address + mapsize)
		panic("mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
		    (u_long)mapbase, ef->address, (u_long)mapsize,
		    (u_long)(vm_offset_t)ef->address + mapsize);

	/* Local intra-module relocations */
	link_elf_reloc_local(lf);

	/* Pull in dependencies */
	VOP_UNLOCK(nd.ni_vp, 0);
	error = linker_load_dependencies(lf);
	vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
	if (error)
		goto out;

	/* External relocations */
	error = relocate_file(ef);
	if (error)
		goto out;

	/* Notify MD code that a module is being loaded. */
	error = elf_cpu_load_file(lf);
	if (error)
		goto out;

	*result = lf;

out:
	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	if (error && lf)
		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
	if (hdr)
		free(hdr, M_LINKER);

	return error;
}
Пример #19
0
/*
 * VFS Operations.
 *
 * mount system call
 */
static int
ext2_mount(struct mount *mp)
{
	struct vfsoptlist *opts;
	struct vnode *devvp;
	struct thread *td;
	struct ext2mount *ump = NULL;
	struct m_ext2fs *fs;
	struct nameidata nd, *ndp = &nd;
	accmode_t accmode;
	char *path, *fspec;
	int error, flags, len;

	td = curthread;
	opts = mp->mnt_optnew;

	if (vfs_filteropt(opts, ext2_opts))
		return (EINVAL);

	vfs_getopt(opts, "fspath", (void **)&path, NULL);
	/* Double-check the length of path.. */
	if (strlen(path) >= MAXMNTLEN - 1)
		return (ENAMETOOLONG);

	fspec = NULL;
	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
	if (!error && fspec[len - 1] != '\0')
		return (EINVAL);

	/*
	 * If updating, check whether changing from read-only to
	 * read/write; if there is no device name, that's all we do.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		ump = VFSTOEXT2(mp);
		fs = ump->um_e2fs; 
		error = 0;
		if (fs->e2fs_ronly == 0 &&
		    vfs_flagopt(opts, "ro", NULL, 0)) {
			error = VFS_SYNC(mp, MNT_WAIT);
			if (error)
				return (error);
			flags = WRITECLOSE;
			if (mp->mnt_flag & MNT_FORCE)
				flags |= FORCECLOSE;
			error = ext2_flushfiles(mp, flags, td);
			if ( error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) {
				fs->e2fs->e2fs_state |= E2FS_ISCLEAN;
				ext2_sbupdate(ump, MNT_WAIT);
			}
			fs->e2fs_ronly = 1;
			vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
			DROP_GIANT();
			g_topology_lock();
			g_access(ump->um_cp, 0, -1, 0);
			g_topology_unlock();
			PICKUP_GIANT();
		}
		if (!error && (mp->mnt_flag & MNT_RELOAD))
			error = ext2_reload(mp, td);
		if (error)
			return (error);
		devvp = ump->um_devvp;
		if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
			if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
				return (EPERM);

			/*
			 * If upgrade to read-write by non-root, then verify
			 * that user has necessary permissions on the device.
			 */
			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
			error = VOP_ACCESS(devvp, VREAD | VWRITE,
			    td->td_ucred, td);
			if (error)
				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
			if (error) {
				VOP_UNLOCK(devvp, 0);
				return (error);
			}
			VOP_UNLOCK(devvp, 0);
			DROP_GIANT();
			g_topology_lock();
			error = g_access(ump->um_cp, 0, 1, 0);
			g_topology_unlock();
			PICKUP_GIANT();
			if (error)
				return (error);

			if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 ||
			    (fs->e2fs->e2fs_state & E2FS_ERRORS)) {
				if (mp->mnt_flag & MNT_FORCE) {
					printf(
"WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
				} else {
					printf(
"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
					    fs->e2fs_fsmnt);
					return (EPERM);
				}
			}
			fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN;
			(void)ext2_cgupdate(ump, MNT_WAIT);
			fs->e2fs_ronly = 0;
			MNT_ILOCK(mp);
			mp->mnt_flag &= ~MNT_RDONLY;
			MNT_IUNLOCK(mp);
		}
		if (vfs_flagopt(opts, "export", NULL, 0)) {
			/* Process export requests in vfs_mount.c. */
			return (error);
		}
	}

	/*
	 * Not an update, or updating the name: look up the name
	 * and verify that it refers to a sensible disk device.
	 */
	if (fspec == NULL)
		return (EINVAL);
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
	if ((error = namei(ndp)) != 0)
		return (error);
	NDFREE(ndp, NDF_ONLY_PNBUF);
	devvp = ndp->ni_vp;

	if (!vn_isdisk(devvp, &error)) {
		vput(devvp);
		return (error);
	}

	/*
	 * If mount by non-root, then verify that user has necessary
	 * permissions on the device.
	 *
	 * XXXRW: VOP_ACCESS() enough?
	 */
	accmode = VREAD;
	if ((mp->mnt_flag & MNT_RDONLY) == 0)
		accmode |= VWRITE;
	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
	}

	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		error = ext2_mountfs(devvp, mp);
	} else {
		if (devvp != ump->um_devvp) {
			vput(devvp);
			return (EINVAL);	/* needs translation */
		} else
			vput(devvp);
	}
	if (error) {
		vrele(devvp);
		return (error);
	}
	ump = VFSTOEXT2(mp);
	fs = ump->um_e2fs;

	/*
	 * Note that this strncpy() is ok because of a check at the start
	 * of ext2_mount().
	 */
	strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
	fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
	vfs_mountedfrom(mp, fspec);
	return (0);
}
Пример #20
0
static int
sysctl_integriforce_so(SYSCTL_HANDLER_ARGS)
{
    integriforce_so_check_t *integriforce_so;
    secadm_prison_entry_t *entry;
    secadm_rule_t r, *rule;
    struct nameidata nd;
    struct vattr vap;
    secadm_key_t key;
    int err;

    if (!(req->newptr) || req->newlen != sizeof(integriforce_so_check_t))
        return (EINVAL);

    if (!(req->oldptr) || req->oldlen != sizeof(integriforce_so_check_t))
        return (EINVAL);

    integriforce_so = malloc(sizeof(integriforce_so_check_t), M_SECADM, M_WAITOK);

    err = SYSCTL_IN(req, integriforce_so, sizeof(integriforce_so_check_t));
    if (err) {
        free(integriforce_so, M_SECADM);
        return (err);
    }

    NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, integriforce_so->isc_path, req->td);
    err = namei(&nd);
    if (err) {
        free(integriforce_so, M_SECADM);
        NDFREE(&nd, 0);
        return (err);
    }

    if ((err = vn_lock(nd.ni_vp, LK_SHARED | LK_RETRY)) != 0) {
        free(integriforce_so, M_SECADM);
        NDFREE(&nd, 0);
        return (err);
    }

    err = VOP_GETATTR(nd.ni_vp, &vap, req->td->td_ucred);
    if (err) {
        free(integriforce_so, M_SECADM);
        NDFREE(&nd, 0);
        return (err);
    }

    VOP_UNLOCK(nd.ni_vp, 0);

    key.sk_jid = req->td->td_ucred->cr_prison->pr_id;
    key.sk_type = secadm_integriforce_rule;
    key.sk_fileid = vap.va_fileid;
    strncpy(key.sk_mntonname,
            nd.ni_vp->v_mount->mnt_stat.f_mntonname, MNAMELEN);
    r.sr_key = fnv_32_buf(&key, sizeof(secadm_key_t), FNV1_32_INIT);

    entry = get_prison_list_entry(
                req->td->td_ucred->cr_prison->pr_id);

    PE_RLOCK(entry);
    rule = RB_FIND(secadm_rules_tree, &(entry->sp_rules), &r);

    if (rule) {
        integriforce_so->isc_result =
            do_integriforce_check(rule, &vap, nd.ni_vp,
                                  req->td->td_ucred);
    }

    PE_RUNLOCK(entry);

    SYSCTL_OUT(req, integriforce_so, sizeof(integriforce_so_check_t));
    free(integriforce_so, M_SECADM);

    NDFREE(&nd, 0);

    return (0);
}
Пример #21
0
static int
link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
{
#ifdef DDB_CTF
	Elf_Ehdr *hdr = NULL;
	Elf_Shdr *shdr = NULL;
	caddr_t ctftab = NULL;
	caddr_t raw = NULL;
	caddr_t shstrtab = NULL;
	elf_file_t ef = (elf_file_t) lf;
	int flags;
	int i;
	int nbytes;
	ssize_t resid;
	int vfslocked;
	size_t sz;
	struct nameidata nd;
	struct thread *td = curthread;
	uint8_t ctf_hdr[CTF_HDR_SIZE];
#endif
	int error = 0;

	if (lf == NULL || lc == NULL)
		return (EINVAL);

	/* Set the defaults for no CTF present. That's not a crime! */
	bzero(lc, sizeof(*lc));

#ifdef DDB_CTF
	/*
	 * First check if we've tried to load CTF data previously and the
	 * CTF ELF section wasn't found. We flag that condition by setting
	 * ctfcnt to -1. See below.
	 */
	if (ef->ctfcnt < 0)
		return (EFTYPE);

	/* Now check if we've already loaded the CTF data.. */
	if (ef->ctfcnt > 0) {
		/* We only need to load once. */
		lc->ctftab = ef->ctftab;
		lc->ctfcnt = ef->ctfcnt;
		lc->symtab = ef->ddbsymtab;
		lc->strtab = ef->ddbstrtab;
		lc->strcnt = ef->ddbstrcnt;
		lc->nsym   = ef->ddbsymcnt;
		lc->ctfoffp = (uint32_t **) &ef->ctfoff;
		lc->typoffp = (uint32_t **) &ef->typoff;
		lc->typlenp = &ef->typlen;
		return (0);
	}

	/*
	 * We need to try reading the CTF data. Flag no CTF data present
	 * by default and if we actually succeed in reading it, we'll
	 * update ctfcnt to the number of bytes read.
	 */
	ef->ctfcnt = -1;

	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, lf->pathname, td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);

	/* Allocate memory for the FLF header. */
	if ((hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read the ELF header. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, hdr, sizeof(*hdr),
	    0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
	    td)) != 0)
		goto out;

	/* Sanity check. */
	if (!IS_ELF(*hdr)) {
		error = ENOEXEC;
		goto out;
	}

	nbytes = hdr->e_shnum * hdr->e_shentsize;
	if (nbytes == 0 || hdr->e_shoff == 0 ||
	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
		error = ENOEXEC;
		goto out;
	}

	/* Allocate memory for all the section headers */
	if ((shdr = malloc(nbytes, M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read all the section headers */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes,
	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td)) != 0)
		goto out;

	/*
	 * We need to search for the CTF section by name, so if the
	 * section names aren't present, then we can't locate the
	 * .SUNW_ctf section containing the CTF data.
	 */
	if (hdr->e_shstrndx == 0 || shdr[hdr->e_shstrndx].sh_type != SHT_STRTAB) {
		printf("%s(%d): module %s e_shstrndx is %d, sh_type is %d\n",
		    __func__, __LINE__, lf->pathname, hdr->e_shstrndx,
		    shdr[hdr->e_shstrndx].sh_type);
		error = EFTYPE;
		goto out;
	}

	/* Allocate memory to buffer the section header strings. */
	if ((shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER,
	    M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read the section header strings. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, shstrtab,
	    shdr[hdr->e_shstrndx].sh_size, shdr[hdr->e_shstrndx].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
	    td)) != 0)
		goto out;

	/* Search for the section containing the CTF data. */
	for (i = 0; i < hdr->e_shnum; i++)
		if (strcmp(".SUNW_ctf", shstrtab + shdr[i].sh_name) == 0)
			break;

	/* Check if the CTF section wasn't found. */
	if (i >= hdr->e_shnum) {
		printf("%s(%d): module %s has no .SUNW_ctf section\n",
		    __func__, __LINE__, lf->pathname);
		error = EFTYPE;
		goto out;
	}

	/* Read the CTF header. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, ctf_hdr, sizeof(ctf_hdr),
	    shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
	    NOCRED, &resid, td)) != 0)
		goto out;

	/* Check the CTF magic number. (XXX check for big endian!) */
	if (ctf_hdr[0] != 0xf1 || ctf_hdr[1] != 0xcf) {
		printf("%s(%d): module %s has invalid format\n",
		    __func__, __LINE__, lf->pathname);
		error = EFTYPE;
		goto out;
	}

	/* Check if version 2. */
	if (ctf_hdr[2] != 2) {
		printf("%s(%d): module %s CTF format version is %d "
		    "(2 expected)\n",
		    __func__, __LINE__, lf->pathname, ctf_hdr[2]);
		error = EFTYPE;
		goto out;
	}

	/* Check if the data is compressed. */
	if ((ctf_hdr[3] & 0x1) != 0) {
		uint32_t *u32 = (uint32_t *) ctf_hdr;

		/*
		 * The last two fields in the CTF header are the offset
		 * from the end of the header to the start of the string
		 * data and the length of that string data. se this
		 * information to determine the decompressed CTF data
		 * buffer required.
		 */
		sz = u32[CTF_HDR_STRTAB_U32] + u32[CTF_HDR_STRLEN_U32] +
		    sizeof(ctf_hdr);

		/*
		 * Allocate memory for the compressed CTF data, including
		 * the header (which isn't compressed).
		 */
		if ((raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK)) == NULL) {
			error = ENOMEM;
			goto out;
		}
	} else {
		/*
		 * The CTF data is not compressed, so the ELF section
		 * size is the same as the buffer size required.
		 */
		sz = shdr[i].sh_size;
	}

	/*
	 * Allocate memory to buffer the CTF data in it's decompressed
	 * form.
	 */
	if ((ctftab = malloc(sz, M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/*
	 * Read the CTF data into the raw buffer if compressed, or
	 * directly into the CTF buffer otherwise.
	 */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, raw == NULL ? ctftab : raw,
	    shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED,
	    td->td_ucred, NOCRED, &resid, td)) != 0)
		goto out;

	/* Check if decompression is required. */
	if (raw != NULL) {
		z_stream zs;
		int ret;

		/*
		 * The header isn't compressed, so copy that into the
		 * CTF buffer first.
		 */
		bcopy(ctf_hdr, ctftab, sizeof(ctf_hdr));

		/* Initialise the zlib structure. */
		bzero(&zs, sizeof(zs));
		zs.zalloc = z_alloc;
		zs.zfree = z_free;

		if (inflateInit(&zs) != Z_OK) {
			error = EIO;
			goto out;
		}

		zs.avail_in = shdr[i].sh_size - sizeof(ctf_hdr);
		zs.next_in = ((uint8_t *) raw) + sizeof(ctf_hdr);
		zs.avail_out = sz - sizeof(ctf_hdr);
		zs.next_out = ((uint8_t *) ctftab) + sizeof(ctf_hdr);
		if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
			printf("%s(%d): zlib inflate returned %d\n", __func__, __LINE__, ret);
			error = EIO;
			goto out;
		}
	}

	/* Got the CTF data! */
	ef->ctftab = ctftab;
	ef->ctfcnt = shdr[i].sh_size;

	/* We'll retain the memory allocated for the CTF data. */
	ctftab = NULL;

	/* Let the caller use the CTF data read. */
	lc->ctftab = ef->ctftab;
	lc->ctfcnt = ef->ctfcnt;
	lc->symtab = ef->ddbsymtab;
	lc->strtab = ef->ddbstrtab;
	lc->strcnt = ef->ddbstrcnt;
	lc->nsym   = ef->ddbsymcnt;
	lc->ctfoffp = (uint32_t **) &ef->ctfoff;
	lc->typoffp = (uint32_t **) &ef->typoff;
	lc->typlenp = &ef->typlen;

out:
	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);

	if (hdr != NULL)
		free(hdr, M_LINKER);
	if (shdr != NULL)
		free(shdr, M_LINKER);
	if (shstrtab != NULL)
		free(shstrtab, M_LINKER);
	if (ctftab != NULL)
		free(ctftab, M_LINKER);
	if (raw != NULL)
		free(raw, M_LINKER);
#else
	error = EOPNOTSUPP;
#endif

	return (error);
}
Пример #22
0
static int
vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
{
	struct nameidata nd;
	struct mount *mporoot, *mpnroot;
	struct vnode *vp, *vporoot, *vpdevfs;
	char *fspath;
	int error;

	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);

	/* Shuffle the mountlist. */
	mtx_lock(&mountlist_mtx);
	mporoot = TAILQ_FIRST(&mountlist);
	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
	if (mporoot != mpdevfs) {
		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
	}
	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
	mtx_unlock(&mountlist_mtx);

	cache_purgevfs(mporoot);
	if (mporoot != mpdevfs)
		cache_purgevfs(mpdevfs);

	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);

	VI_LOCK(vporoot);
	vporoot->v_iflag &= ~VI_MOUNT;
	VI_UNLOCK(vporoot);
	vporoot->v_mountedhere = NULL;
	mporoot->mnt_flag &= ~MNT_ROOTFS;
	mporoot->mnt_vnodecovered = NULL;
	vput(vporoot);

	/* Set up the new rootvnode, and purge the cache */
	mpnroot->mnt_vnodecovered = NULL;
	set_rootvnode();
	cache_purgevfs(rootvnode->v_mount);

	if (mporoot != mpdevfs) {
		/* Remount old root under /.mount or /mnt */
		fspath = "/.mount";
		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
		    fspath, td);
		error = namei(&nd);
		if (error) {
			NDFREE(&nd, NDF_ONLY_PNBUF);
			fspath = "/mnt";
			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
			    fspath, td);
			error = namei(&nd);
		}
		if (!error) {
			vp = nd.ni_vp;
			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
			if (!error)
				error = vinvalbuf(vp, V_SAVE, 0, 0);
			if (!error) {
				cache_purge(vp);
				mporoot->mnt_vnodecovered = vp;
				vp->v_mountedhere = mporoot;
				strlcpy(mporoot->mnt_stat.f_mntonname,
				    fspath, MNAMELEN);
				VOP_UNLOCK(vp, 0);
			} else
				vput(vp);
		}
		NDFREE(&nd, NDF_ONLY_PNBUF);

		if (error && bootverbose)
			printf("mountroot: unable to remount previous root "
			    "under /.mount or /mnt (error %d).\n", error);
	}

	/* Remount devfs under /dev */
	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
	error = namei(&nd);
	if (!error) {
		vp = nd.ni_vp;
		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
		if (!error)
			error = vinvalbuf(vp, V_SAVE, 0, 0);
		if (!error) {
			vpdevfs = mpdevfs->mnt_vnodecovered;
			if (vpdevfs != NULL) {
				cache_purge(vpdevfs);
				vpdevfs->v_mountedhere = NULL;
				vrele(vpdevfs);
			}
			mpdevfs->mnt_vnodecovered = vp;
			vp->v_mountedhere = mpdevfs;
			VOP_UNLOCK(vp, 0);
		} else
			vput(vp);
	}
	if (error && bootverbose)
		printf("mountroot: unable to remount devfs under /dev "
		    "(error %d).\n", error);
	NDFREE(&nd, NDF_ONLY_PNBUF);

	if (mporoot == mpdevfs) {
		vfs_unbusy(mpdevfs);
		/* Unlink the no longer needed /dev/dev -> / symlink */
		error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
		if (error && bootverbose)
			printf("mountroot: unable to unlink /dev/dev "
			    "(error %d)\n", error);
	}

	return (0);
}
Пример #23
0
/*
 * Mount system call
 */
static int
reiserfs_mount(struct mount *mp)
{
	size_t size;
	int error, len;
	accmode_t accmode;
	char *path, *fspec;
	struct vnode *devvp;
	struct vfsoptlist *opts;
	struct reiserfs_mount *rmp;
	struct reiserfs_sb_info *sbi;
	struct nameidata nd, *ndp = &nd;
	struct thread *td;

	td = curthread;
	if (!(mp->mnt_flag & MNT_RDONLY))
		return EROFS;

	/* Get the new options passed to mount */
	opts = mp->mnt_optnew;

	/* `fspath' contains the mount point (eg. /mnt/linux); REQUIRED */
	vfs_getopt(opts, "fspath", (void **)&path, NULL);
	reiserfs_log(LOG_INFO, "mount point is `%s'\n", path);

	/* `from' contains the device name (eg. /dev/ad0s1); REQUIRED */
	fspec = NULL;
	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
	if (!error && fspec[len - 1] != '\0')
		return (EINVAL);
	reiserfs_log(LOG_INFO, "device is `%s'\n", fspec);

	/* Handle MNT_UPDATE (mp->mnt_flag) */
	if (mp->mnt_flag & MNT_UPDATE) {
		/* For now, only NFS export is supported. */
		if (vfs_flagopt(opts, "export", NULL, 0))
			return (0);
	}

	/* Not an update, or updating the name: look up the name
	 * and verify that it refers to a sensible disk device. */
	if (fspec == NULL)
		return (EINVAL);

	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
	if ((error = namei(ndp)) != 0)
		return (error);
	NDFREE(ndp, NDF_ONLY_PNBUF);
	devvp = ndp->ni_vp;

	if (!vn_isdisk(devvp, &error)) {
		vput(devvp);
		return (error);
	}

	/* If mount by non-root, then verify that user has necessary
	 * permissions on the device. */
	accmode = VREAD;
	if ((mp->mnt_flag & MNT_RDONLY) == 0)
		accmode |= VWRITE;
	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
	}

	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		error = reiserfs_mountfs(devvp, mp, td);
	} else {
		/* TODO Handle MNT_UPDATE */
		vput(devvp);
		return (EOPNOTSUPP);
	}

	if (error) {
		vrele(devvp);
		return (error);
	}

	rmp = VFSTOREISERFS(mp);
	sbi = rmp->rm_reiserfs;

	/*
	 * Note that this strncpy() is ok because of a check at the start
	 * of reiserfs_mount().
	 */
	reiserfs_log(LOG_DEBUG, "prepare statfs data\n");
	(void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
	(void)reiserfs_statfs(mp, &mp->mnt_stat);

	reiserfs_log(LOG_DEBUG, "done\n");
	return (0);
}
Пример #24
0
static int
ntfs_mount(struct mount *mp)
{
	int err = 0, error;
	struct vnode *devvp;
	struct nameidata ndp;
	struct thread *td;
	char *from;

	td = curthread;
	if (vfs_filteropt(mp->mnt_optnew, ntfs_opts))
		return (EINVAL);

	/* Force mount as read-only. */
	MNT_ILOCK(mp);
	mp->mnt_flag |= MNT_RDONLY;
	MNT_IUNLOCK(mp);

	from = vfs_getopts(mp->mnt_optnew, "from", &error);
	if (error)	
		return (error);

	/*
	 * If updating, check whether changing from read-only to
	 * read/write.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
			/* Process export requests in vfs_mount.c */
			return (0);
		} else {
			printf("ntfs_mount(): MNT_UPDATE not supported\n");
			return (EINVAL);
		}
	}

	/*
	 * Not an update, or updating the name: look up the name
	 * and verify that it refers to a sensible block device.
	 */
	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, from, td);
	err = namei(&ndp);
	if (err)
		return (err);
	NDFREE(&ndp, NDF_ONLY_PNBUF);
	devvp = ndp.ni_vp;

	if (!vn_isdisk(devvp, &err))  {
		vput(devvp);
		return (err);
	}

	/*
	 * If mount by non-root, then verify that user has necessary
	 * permissions on the device.
	 */
	err = VOP_ACCESS(devvp, VREAD, td->td_ucred, td);
	if (err)
		err = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (err) {
		vput(devvp);
		return (err);
	}


	/*
	 * Since this is a new mount, we want the names for the device and
	 * the mount point copied in.  If an error occurs, the mountpoint is
	 * discarded by the upper level code.  Note that vfs_mount() handles
	 * copying the mountpoint f_mntonname for us, so we don't have to do
	 * it here unless we want to set it to something other than "path"
	 * for some rason.
	 */

	err = ntfs_mountfs(devvp, mp, td);
	if (err == 0) {

		/* Save "mounted from" info for mount point. */
		vfs_mountedfrom(mp, from);
	} else
		vrele(devvp);
	return (err);
}
Пример #25
0
VMBlockVFSMount(struct mount *mp,        // IN: mount(2) parameters
                struct thread *td)       // IN: caller's thread context
#endif
{
   struct VMBlockMount *xmp;
   struct nameidata nd, *ndp = &nd;
   struct vnode *lowerrootvp, *vp;
   char *target;
   char *pathname;
   int len, error = 0;

   VMBLOCKDEBUG("VMBlockVFSMount(mp = %p)\n", (void *)mp);

   /*
    * TODO:  Strip out extraneous export & other misc cruft.
    */

   /*
    * Disallow the following:
    *   1.  Mounting over the system root.
    *   2.  Mount updates/remounts.  (Reconsider for rw->ro, ro->rw?)
    *   3.  Mounting VMBlock on top of a VMBlock.
    */
   if ((mp->mnt_flag & MNT_ROOTFS) ||
       (mp->mnt_flag & MNT_UPDATE) ||
       (mp->mnt_vnodecovered->v_op == &VMBlockVnodeOps)) {
      return EOPNOTSUPP;
   }

   /*
    * XXX Should only be unlocked if mnt_flag & MNT_UPDATE.
    */
   ASSERT_VOP_UNLOCKED(mp->mnt_vnodecovered, "Covered vnode already locked!");

   /*
    * Look up path to lower layer (VMBlock source / DnD staging area).
    * (E.g., in the command "mount /tmp/VMwareDnD /var/run/vmblock",
    * /tmp/VMwareDnD is the staging area.)
    */
   error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
   if (error || target[len - 1] != '\0') {
      return EINVAL;
   }

   pathname = uma_zalloc(VMBlockPathnameZone, M_WAITOK);
   if (pathname == NULL) {
      return ENOMEM;
   }

   if (strlcpy(pathname, target, MAXPATHLEN) >= MAXPATHLEN) {
      uma_zfree(VMBlockPathnameZone, pathname);
      return ENAMETOOLONG;
   }

   /*
    * Find lower node and lock if not already locked.
    */

   NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, compat_td);
   error = namei(ndp);
   if (error) {
      NDFREE(ndp, 0);
      uma_zfree(VMBlockPathnameZone, pathname);
      return error;
   }
   NDFREE(ndp, NDF_ONLY_PNBUF);

   /*
    * Check multi VMBlock mount to avoid `lock against myself' panic.
    */
   lowerrootvp = ndp->ni_vp;
   if (lowerrootvp == VPTOVMB(mp->mnt_vnodecovered)->lowerVnode) {
      VMBLOCKDEBUG("VMBlockVFSMount: multi vmblock mount?\n");
      vput(lowerrootvp);
      uma_zfree(VMBlockPathnameZone, pathname);
      return EDEADLK;
   }

   xmp = malloc(sizeof *xmp, M_VMBLOCKFSMNT, M_WAITOK);

   /*
    * Record pointer (mountVFS) to the staging area's file system.  Follow up
    * by grabbing a VMBlockNode for our layer's root.
    */
   xmp->mountVFS = lowerrootvp->v_mount;
   error = VMBlockNodeGet(mp, lowerrootvp, &vp, pathname);

   /*
    * Make sure the node alias worked
    */
   if (error) {
      COMPAT_VOP_UNLOCK(vp, 0, compat_td);
      vrele(lowerrootvp);
      free(xmp, M_VMBLOCKFSMNT);   /* XXX */
      uma_zfree(VMBlockPathnameZone, pathname);
      return error;
   }

   /*
    * Record a reference to the new filesystem's root vnode & mark it as such.
    */
   xmp->rootVnode = vp;
   xmp->rootVnode->v_vflag |= VV_ROOT;

   /*
    * Unlock the node (either the lower or the alias)
    */
   COMPAT_VOP_UNLOCK(vp, 0, compat_td);

   /*
    * If the staging area is a local filesystem, reflect that here, too.  (We
    * could potentially allow NFS staging areas.)
    */
   MNT_ILOCK(mp);
   mp->mnt_flag |= lowerrootvp->v_mount->mnt_flag & MNT_LOCAL;
#if __FreeBSD_version >= 600000 && __FreeBSD_version < 1000000
   mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & MNTK_MPSAFE;
#endif
   MNT_IUNLOCK(mp);

   mp->mnt_data = (qaddr_t) xmp;

   vfs_getnewfsid(mp);
   vfs_mountedfrom(mp, target);

   VMBLOCKDEBUG("VMBlockVFSMount: lower %s, alias at %s\n",
      mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
   return 0;
}
Пример #26
0
static int
fuse_getdevice(const char *fspec, struct thread *td, struct cdev **fdevp)
{
	struct nameidata nd, *ndp = &nd;
	struct vnode *devvp;
	struct cdev *fdev;
	int err;

	/*
         * Not an update, or updating the name: look up the name
         * and verify that it refers to a sensible disk device.
         */

	NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
	if ((err = namei(ndp)) != 0)
		return err;
	NDFREE(ndp, NDF_ONLY_PNBUF);
	devvp = ndp->ni_vp;

	if (devvp->v_type != VCHR) {
		vrele(devvp);
		return ENXIO;
	}
	fdev = devvp->v_rdev;
	dev_ref(fdev);

	if (fuse_enforce_dev_perms) {
		/*
	         * Check if mounter can open the fuse device.
	         *
	         * This has significance only if we are doing a secondary mount
	         * which doesn't involve actually opening fuse devices, but we
	         * still want to enforce the permissions of the device (in
	         * order to keep control over the circle of fuse users).
	         *
	         * (In case of primary mounts, we are either the superuser so
	         * we can do anything anyway, or we can mount only if the
	         * device is already opened by us, ie. we are permitted to open
	         * the device.)
	         */
#if 0
#ifdef MAC
		err = mac_check_vnode_open(td->td_ucred, devvp, VREAD | VWRITE);
		if (!err)
#endif
#endif /* 0 */
			err = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td);
		if (err) {
			vrele(devvp);
			dev_rel(fdev);
			return err;
		}
	}
	/*
         * according to coda code, no extra lock is needed --
         * although in sys/vnode.h this field is marked "v"
         */
	vrele(devvp);

	if (!fdev->si_devsw ||
	    strcmp("fuse", fdev->si_devsw->d_name)) {
		dev_rel(fdev);
		return ENXIO;
	}
	*fdevp = fdev;

	return 0;
}
Пример #27
0
/*
 * Mount null layer
 */
static int
nullfs_mount(struct mount *mp)
{
	int error = 0;
	struct vnode *lowerrootvp, *vp;
	struct vnode *nullm_rootvp;
	struct null_mount *xmp;
	struct thread *td = curthread;
	char *target;
	int isvnunlocked = 0, len;
	struct nameidata nd, *ndp = &nd;

	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);

	if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_NULLFS))
		return (EPERM);
	if (mp->mnt_flag & MNT_ROOTFS)
		return (EOPNOTSUPP);

	/*
	 * Update is a no-op
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		/*
		 * Only support update mounts for NFS export.
		 */
		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
			return (0);
		else
			return (EOPNOTSUPP);
	}

	/*
	 * Get argument
	 */
	error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
	if (error || target[len - 1] != '\0')
		return (EINVAL);

	/*
	 * Unlock lower node to avoid possible deadlock.
	 */
	if ((mp->mnt_vnodecovered->v_op == &null_vnodeops) &&
	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
		VOP_UNLOCK(mp->mnt_vnodecovered, 0);
		isvnunlocked = 1;
	}
	/*
	 * Find lower node
	 */
	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread);
	error = namei(ndp);

	/*
	 * Re-lock vnode.
	 * XXXKIB This is deadlock-prone as well.
	 */
	if (isvnunlocked)
		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);

	if (error)
		return (error);
	NDFREE(ndp, NDF_ONLY_PNBUF);

	/*
	 * Sanity check on lower vnode
	 */
	lowerrootvp = ndp->ni_vp;

	/*
	 * Check multi null mount to avoid `lock against myself' panic.
	 */
	if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
		NULLFSDEBUG("nullfs_mount: multi null mount?\n");
		vput(lowerrootvp);
		return (EDEADLK);
	}

	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
	    M_NULLFSMNT, M_WAITOK | M_ZERO);

	/*
	 * Save reference to underlying FS
	 */
	xmp->nullm_vfs = lowerrootvp->v_mount;

	/*
	 * Save reference.  Each mount also holds
	 * a reference on the root vnode.
	 */
	error = null_nodeget(mp, lowerrootvp, &vp);
	/*
	 * Make sure the node alias worked
	 */
	if (error) {
		free(xmp, M_NULLFSMNT);
		return (error);
	}

	/*
	 * Keep a held reference to the root vnode.
	 * It is vrele'd in nullfs_unmount.
	 */
	nullm_rootvp = vp;
	nullm_rootvp->v_vflag |= VV_ROOT;
	xmp->nullm_rootvp = nullm_rootvp;

	/*
	 * Unlock the node (either the lower or the alias)
	 */
	VOP_UNLOCK(vp, 0);

	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
		MNT_ILOCK(mp);
		mp->mnt_flag |= MNT_LOCAL;
		MNT_IUNLOCK(mp);
	}

	xmp->nullm_flags |= NULLM_CACHE;
	if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0)
		xmp->nullm_flags &= ~NULLM_CACHE;

	MNT_ILOCK(mp);
	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
		    MNTK_EXTENDED_SHARED);
	}
	mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT;
	MNT_IUNLOCK(mp);
	mp->mnt_data = xmp;
	vfs_getnewfsid(mp);
	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
		MNT_ILOCK(xmp->nullm_vfs);
		TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
		    mnt_upper_link);
		MNT_IUNLOCK(xmp->nullm_vfs);
	}

	vfs_mountedfrom(mp, target);

	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
	return (0);
}
Пример #28
0
/*
 * mp - path - addr in user space of mount point (ie /usr or whatever)
 * data - addr in user space of mount params including the name of the block
 * special file to treat as a filesystem.
 */
static int
msdosfs_mount(struct mount *mp)
{
	struct vnode *devvp;	  /* vnode for blk device to mount */
	struct thread *td;
	/* msdosfs specific mount control block */
	struct msdosfsmount *pmp = NULL;
	struct nameidata ndp;
	int error, flags;
	accmode_t accmode;
	char *from;

	td = curthread;
	if (vfs_filteropt(mp->mnt_optnew, msdosfs_opts))
		return (EINVAL);

	/*
	 * If updating, check whether changing from read-only to
	 * read/write; if there is no device name, that's all we do.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		pmp = VFSTOMSDOSFS(mp);
		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
			/*
			 * Forbid export requests if filesystem has
			 * MSDOSFS_LARGEFS flag set.
			 */
			if ((pmp->pm_flags & MSDOSFS_LARGEFS) != 0) {
				vfs_mount_error(mp,
				    "MSDOSFS_LARGEFS flag set, cannot export");
				return (EOPNOTSUPP);
			}
		}
		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) &&
		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
			error = VFS_SYNC(mp, MNT_WAIT);
			if (error)
				return (error);
			flags = WRITECLOSE;
			if (mp->mnt_flag & MNT_FORCE)
				flags |= FORCECLOSE;
			error = vflush(mp, 0, flags, td);
			if (error)
				return (error);

			/*
			 * Now the volume is clean.  Mark it so while the
			 * device is still rw.
			 */
			error = markvoldirty(pmp, 0);
			if (error) {
				(void)markvoldirty(pmp, 1);
				return (error);
			}

			/* Downgrade the device from rw to ro. */
			DROP_GIANT();
			g_topology_lock();
			error = g_access(pmp->pm_cp, 0, -1, 0);
			g_topology_unlock();
			PICKUP_GIANT();
			if (error) {
				(void)markvoldirty(pmp, 1);
				return (error);
			}

			/*
			 * Backing out after an error was painful in the
			 * above.  Now we are committed to succeeding.
			 */
			pmp->pm_fmod = 0;
			pmp->pm_flags |= MSDOSFSMNT_RONLY;
			MNT_ILOCK(mp);
			mp->mnt_flag |= MNT_RDONLY;
			MNT_IUNLOCK(mp);
		} else if ((pmp->pm_flags & MSDOSFSMNT_RONLY) &&
		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
			/*
			 * If upgrade to read-write by non-root, then verify
			 * that user has necessary permissions on the device.
			 */
			devvp = pmp->pm_devvp;
			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
			error = VOP_ACCESS(devvp, VREAD | VWRITE,
			    td->td_ucred, td);
			if (error)
				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
			if (error) {
				VOP_UNLOCK(devvp, 0);
				return (error);
			}
			VOP_UNLOCK(devvp, 0);
			DROP_GIANT();
			g_topology_lock();
			error = g_access(pmp->pm_cp, 0, 1, 0);
			g_topology_unlock();
			PICKUP_GIANT();
			if (error)
				return (error);

			pmp->pm_fmod = 1;
			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
			MNT_ILOCK(mp);
			mp->mnt_flag &= ~MNT_RDONLY;
			MNT_IUNLOCK(mp);

			/* Now that the volume is modifiable, mark it dirty. */
			error = markvoldirty(pmp, 1);
			if (error)
				return (error); 
		}
	}
	/*
	 * Not an update, or updating the name: look up the name
	 * and verify that it refers to a sensible disk device.
	 */
	if (vfs_getopt(mp->mnt_optnew, "from", (void **)&from, NULL))
		return (EINVAL);
	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, from, td);
	error = namei(&ndp);
	if (error)
		return (error);
	devvp = ndp.ni_vp;
	NDFREE(&ndp, NDF_ONLY_PNBUF);

	if (!vn_isdisk(devvp, &error)) {
		vput(devvp);
		return (error);
	}
	/*
	 * If mount by non-root, then verify that user has necessary
	 * permissions on the device.
	 */
	accmode = VREAD;
	if ((mp->mnt_flag & MNT_RDONLY) == 0)
		accmode |= VWRITE;
	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
	}
	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		error = mountmsdosfs(devvp, mp);
#ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
		pmp = VFSTOMSDOSFS(mp);
#endif
	} else {
		vput(devvp);
		if (devvp != pmp->pm_devvp)
			return (EINVAL);	/* XXX needs translation */
	}
	if (error) {
		vrele(devvp);
		return (error);
	}

	error = update_mp(mp, td);
	if (error) {
		if ((mp->mnt_flag & MNT_UPDATE) == 0)
			msdosfs_unmount(mp, MNT_FORCE);
		return error;
	}

	if (devvp->v_type == VCHR && devvp->v_rdev != NULL)
		devvp->v_rdev->si_mountpt = mp;
	vfs_mountedfrom(mp, from);
#ifdef MSDOSFS_DEBUG
	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
#endif
	return (0);
}
Пример #29
0
static int
cd9660_mount(struct mount *mp, struct thread *td)
{
	struct vnode *devvp;
	char *fspec;
	int error;
	mode_t accessmode;
	struct nameidata ndp;
	struct iso_mnt *imp = 0;

	/*
	 * Unconditionally mount as read-only.
	 */
	MNT_ILOCK(mp);
	mp->mnt_flag |= MNT_RDONLY;
	MNT_IUNLOCK(mp);

	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
	if (error)
		return (error);

	imp = VFSTOISOFS(mp);

	if (mp->mnt_flag & MNT_UPDATE) {
		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
			return (0);
	}
	/*
	 * Not an update, or updating the name: look up the name
	 * and verify that it refers to a sensible block device.
	 */
	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
	if ((error = namei(&ndp)))
		return (error);
	NDFREE(&ndp, NDF_ONLY_PNBUF);
	devvp = ndp.ni_vp;

	if (!vn_isdisk(devvp, &error)) {
		vput(devvp);
		return (error);
	}

	/*
	 * Verify that user has necessary permissions on the device,
	 * or has superuser abilities
	 */
	accessmode = VREAD;
	error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
	}

	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		error = iso_mountfs(devvp, mp, td);
		if (error)
			vrele(devvp);
	} else {
		if (devvp != imp->im_devvp)
			error = EINVAL;	/* needs translation */
		vput(devvp);
	}
	if (error)
		return (error);
	vfs_mountedfrom(mp, fspec);
	return (0);
}
Пример #30
0
/*
 * In-kernel implementation of execve().  All arguments are assumed to be
 * userspace pointers from the passed thread.
 */
static int
do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
{
	struct proc *p = td->td_proc;
	struct nameidata nd;
	struct ucred *oldcred;
	struct uidinfo *euip = NULL;
	register_t *stack_base;
	int error, i;
	struct image_params image_params, *imgp;
	struct vattr attr;
	int (*img_first)(struct image_params *);
	struct pargs *oldargs = NULL, *newargs = NULL;
	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
#ifdef KTRACE
	struct vnode *tracevp = NULL;
	struct ucred *tracecred = NULL;
#endif
	struct vnode *oldtextvp = NULL, *newtextvp;
	int credential_changing;
	int textset;
#ifdef MAC
	struct label *interpvplabel = NULL;
	int will_transition;
#endif
#ifdef HWPMC_HOOKS
	struct pmckern_procexec pe;
#endif
	static const char fexecv_proc_title[] = "(fexecv)";

	imgp = &image_params;

	/*
	 * Lock the process and set the P_INEXEC flag to indicate that
	 * it should be left alone until we're done here.  This is
	 * necessary to avoid race conditions - e.g. in ptrace() -
	 * that might allow a local user to illicitly obtain elevated
	 * privileges.
	 */
	PROC_LOCK(p);
	KASSERT((p->p_flag & P_INEXEC) == 0,
	    ("%s(): process already has P_INEXEC flag", __func__));
	p->p_flag |= P_INEXEC;
	PROC_UNLOCK(p);

	/*
	 * Initialize part of the common data
	 */
	bzero(imgp, sizeof(*imgp));
	imgp->proc = p;
	imgp->attr = &attr;
	imgp->args = args;
	oldcred = p->p_ucred;

#ifdef MAC
	error = mac_execve_enter(imgp, mac_p);
	if (error)
		goto exec_fail;
#endif

	/*
	 * Translate the file name. namei() returns a vnode pointer
	 *	in ni_vp among other things.
	 *
	 * XXXAUDIT: It would be desirable to also audit the name of the
	 * interpreter if this is an interpreted binary.
	 */
	if (args->fname != NULL) {
		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
		    | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
	}

	SDT_PROBE1(proc, , , exec, args->fname);

interpret:
	if (args->fname != NULL) {
#ifdef CAPABILITY_MODE
		/*
		 * While capability mode can't reach this point via direct
		 * path arguments to execve(), we also don't allow
		 * interpreters to be used in capability mode (for now).
		 * Catch indirect lookups and return a permissions error.
		 */
		if (IN_CAPABILITY_MODE(td)) {
			error = ECAPMODE;
			goto exec_fail;
		}
#endif
		error = namei(&nd);
		if (error)
			goto exec_fail;

		newtextvp = nd.ni_vp;
		imgp->vp = newtextvp;
	} else {
		AUDIT_ARG_FD(args->fd);
		/*
		 * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
		 */
		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
		if (error)
			goto exec_fail;
		vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
		AUDIT_ARG_VNODE1(newtextvp);
		imgp->vp = newtextvp;
	}

	/*
	 * Check file permissions (also 'opens' file)
	 */
	error = exec_check_permissions(imgp);
	if (error)
		goto exec_fail_dealloc;

	imgp->object = imgp->vp->v_object;
	if (imgp->object != NULL)
		vm_object_reference(imgp->object);

	/*
	 * Set VV_TEXT now so no one can write to the executable while we're
	 * activating it.
	 *
	 * Remember if this was set before and unset it in case this is not
	 * actually an executable image.
	 */
	textset = VOP_IS_TEXT(imgp->vp);
	VOP_SET_TEXT(imgp->vp);

	error = exec_map_first_page(imgp);
	if (error)
		goto exec_fail_dealloc;

	imgp->proc->p_osrel = 0;
	imgp->proc->p_fctl0 = 0;

	/*
	 * Implement image setuid/setgid.
	 *
	 * Determine new credentials before attempting image activators
	 * so that it can be used by process_exec handlers to determine
	 * credential/setid changes.
	 *
	 * Don't honor setuid/setgid if the filesystem prohibits it or if
	 * the process is being traced.
	 *
	 * We disable setuid/setgid/etc in capability mode on the basis
	 * that most setugid applications are not written with that
	 * environment in mind, and will therefore almost certainly operate
	 * incorrectly. In principle there's no reason that setugid
	 * applications might not be useful in capability mode, so we may want
	 * to reconsider this conservative design choice in the future.
	 *
	 * XXXMAC: For the time being, use NOSUID to also prohibit
	 * transitions on the file system.
	 */
	credential_changing = 0;
	credential_changing |= (attr.va_mode & S_ISUID) &&
	    oldcred->cr_uid != attr.va_uid;
	credential_changing |= (attr.va_mode & S_ISGID) &&
	    oldcred->cr_gid != attr.va_gid;
#ifdef MAC
	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
	    interpvplabel, imgp);
	credential_changing |= will_transition;
#endif

	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
	if (credential_changing)
		imgp->proc->p_pdeathsig = 0;

	if (credential_changing &&
#ifdef CAPABILITY_MODE
	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
#endif
	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
	    (p->p_flag & P_TRACED) == 0) {
		imgp->credential_setid = true;
		VOP_UNLOCK(imgp->vp, 0);
		imgp->newcred = crdup(oldcred);
		if (attr.va_mode & S_ISUID) {
			euip = uifind(attr.va_uid);
			change_euid(imgp->newcred, euip);
		}
		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
		if (attr.va_mode & S_ISGID)
			change_egid(imgp->newcred, attr.va_gid);
		/*
		 * Implement correct POSIX saved-id behavior.
		 *
		 * XXXMAC: Note that the current logic will save the
		 * uid and gid if a MAC domain transition occurs, even
		 * though maybe it shouldn't.
		 */
		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
	} else {
		/*
		 * Implement correct POSIX saved-id behavior.
		 *
		 * XXX: It's not clear that the existing behavior is
		 * POSIX-compliant.  A number of sources indicate that the
		 * saved uid/gid should only be updated if the new ruid is
		 * not equal to the old ruid, or the new euid is not equal
		 * to the old euid and the new euid is not equal to the old
		 * ruid.  The FreeBSD code always updates the saved uid/gid.
		 * Also, this code uses the new (replaced) euid and egid as
		 * the source, which may or may not be the right ones to use.
		 */
		if (oldcred->cr_svuid != oldcred->cr_uid ||
		    oldcred->cr_svgid != oldcred->cr_gid) {
			VOP_UNLOCK(imgp->vp, 0);
			imgp->newcred = crdup(oldcred);
			vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
		}
	}
	/* The new credentials are installed into the process later. */

	/*
	 * Do the best to calculate the full path to the image file.
	 */
	if (args->fname != NULL && args->fname[0] == '/')
		imgp->execpath = args->fname;
	else {
		VOP_UNLOCK(imgp->vp, 0);
		if (vn_fullpath(td, imgp->vp, &imgp->execpath,
		    &imgp->freepath) != 0)
			imgp->execpath = args->fname;
		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
	}

	/*
	 *	If the current process has a special image activator it
	 *	wants to try first, call it.   For example, emulating shell
	 *	scripts differently.
	 */
	error = -1;
	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
		error = img_first(imgp);

	/*
	 *	Loop through the list of image activators, calling each one.
	 *	An activator returns -1 if there is no match, 0 on success,
	 *	and an error otherwise.
	 */
	for (i = 0; error == -1 && execsw[i]; ++i) {
		if (execsw[i]->ex_imgact == NULL ||
		    execsw[i]->ex_imgact == img_first) {
			continue;
		}
		error = (*execsw[i]->ex_imgact)(imgp);
	}

	if (error) {
		if (error == -1) {
			if (textset == 0)
				VOP_UNSET_TEXT(imgp->vp);
			error = ENOEXEC;
		}
		goto exec_fail_dealloc;
	}

	/*
	 * Special interpreter operation, cleanup and loop up to try to
	 * activate the interpreter.
	 */
	if (imgp->interpreted) {
		exec_unmap_first_page(imgp);
		/*
		 * VV_TEXT needs to be unset for scripts.  There is a short
		 * period before we determine that something is a script where
		 * VV_TEXT will be set. The vnode lock is held over this
		 * entire period so nothing should illegitimately be blocked.
		 */
		VOP_UNSET_TEXT(imgp->vp);
		/* free name buffer and old vnode */
		if (args->fname != NULL)
			NDFREE(&nd, NDF_ONLY_PNBUF);
#ifdef MAC
		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
#endif
		if (imgp->opened) {
			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
			imgp->opened = 0;
		}
		vput(newtextvp);
		vm_object_deallocate(imgp->object);
		imgp->object = NULL;
		imgp->credential_setid = false;
		if (imgp->newcred != NULL) {
			crfree(imgp->newcred);
			imgp->newcred = NULL;
		}
		imgp->execpath = NULL;
		free(imgp->freepath, M_TEMP);
		imgp->freepath = NULL;
		/* set new name to that of the interpreter */
		NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
		    UIO_SYSSPACE, imgp->interpreter_name, td);
		args->fname = imgp->interpreter_name;
		goto interpret;
	}

	/*
	 * NB: We unlock the vnode here because it is believed that none
	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
	 */
	VOP_UNLOCK(imgp->vp, 0);

	if (disallow_high_osrel &&
	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
		error = ENOEXEC;
		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		goto exec_fail_dealloc;
	}

	/* ABI enforces the use of Capsicum. Switch into capabilities mode. */
	if (SV_PROC_FLAG(p, SV_CAPSICUM))
		sys_cap_enter(td, NULL);

	/*
	 * Copy out strings (args and env) and initialize stack base.
	 */
	stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);

	/*
	 * Stack setup.
	 */
	error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
	if (error != 0) {
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		goto exec_fail_dealloc;
	}

	if (args->fdp != NULL) {
		/* Install a brand new file descriptor table. */
		fdinstall_remapped(td, args->fdp);
		args->fdp = NULL;
	} else {
		/*
		 * Keep on using the existing file descriptor table. For
		 * security and other reasons, the file descriptor table
		 * cannot be shared after an exec.
		 */
		fdunshare(td);
		/* close files on exec */
		fdcloseexec(td);
	}

	/*
	 * Malloc things before we need locks.
	 */
	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
	/* Cache arguments if they fit inside our allowance */
	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
		newargs = pargs_alloc(i);
		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
	}

	/*
	 * For security and other reasons, signal handlers cannot
	 * be shared after an exec. The new process gets a copy of the old
	 * handlers. In execsigs(), the new process will have its signals
	 * reset.
	 */
	if (sigacts_shared(p->p_sigacts)) {
		oldsigacts = p->p_sigacts;
		newsigacts = sigacts_alloc();
		sigacts_copy(newsigacts, oldsigacts);
	}

	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);

	PROC_LOCK(p);
	if (oldsigacts)
		p->p_sigacts = newsigacts;
	/* Stop profiling */
	stopprofclock(p);

	/* reset caught signals */
	execsigs(p);

	/* name this process - nameiexec(p, ndp) */
	bzero(p->p_comm, sizeof(p->p_comm));
	if (args->fname)
		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
#ifdef KTR
	sched_clear_tdname(td);
#endif

	/*
	 * mark as execed, wakeup the process that vforked (if any) and tell
	 * it that it now has its own resources back
	 */
	p->p_flag |= P_EXEC;
	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
		p->p_flag2 &= ~P2_NOTRACE;
	if (p->p_flag & P_PPWAIT) {
		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
		cv_broadcast(&p->p_pwait);
		/* STOPs are no longer ignored, arrange for AST */
		signotify(td);
	}

	/*
	 * Implement image setuid/setgid installation.
	 */
	if (imgp->credential_setid) {
		/*
		 * Turn off syscall tracing for set-id programs, except for
		 * root.  Record any set-id flags first to make sure that
		 * we do not regain any tracing during a possible block.
		 */
		setsugid(p);

#ifdef KTRACE
		if (p->p_tracecred != NULL &&
		    priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED))
			ktrprocexec(p, &tracecred, &tracevp);
#endif
		/*
		 * Close any file descriptors 0..2 that reference procfs,
		 * then make sure file descriptors 0..2 are in use.
		 *
		 * Both fdsetugidsafety() and fdcheckstd() may call functions
		 * taking sleepable locks, so temporarily drop our locks.
		 */
		PROC_UNLOCK(p);
		VOP_UNLOCK(imgp->vp, 0);
		fdsetugidsafety(td);
		error = fdcheckstd(td);
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		if (error != 0)
			goto exec_fail_dealloc;
		PROC_LOCK(p);
#ifdef MAC
		if (will_transition) {
			mac_vnode_execve_transition(oldcred, imgp->newcred,
			    imgp->vp, interpvplabel, imgp);
		}
#endif
	} else {
		if (oldcred->cr_uid == oldcred->cr_ruid &&
		    oldcred->cr_gid == oldcred->cr_rgid)
			p->p_flag &= ~P_SUGID;
	}
	/*
	 * Set the new credentials.
	 */
	if (imgp->newcred != NULL) {
		proc_set_cred(p, imgp->newcred);
		crfree(oldcred);
		oldcred = NULL;
	}

	/*
	 * Store the vp for use in procfs.  This vnode was referenced by namei
	 * or fgetvp_exec.
	 */
	oldtextvp = p->p_textvp;
	p->p_textvp = newtextvp;

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exec if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exec)
		dtrace_fasttrap_exec(p);
#endif

	/*
	 * Notify others that we exec'd, and clear the P_INEXEC flag
	 * as we're now a bona fide freshly-execed process.
	 */
	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
	p->p_flag &= ~P_INEXEC;

	/* clear "fork but no exec" flag, as we _are_ execing */
	p->p_acflag &= ~AFORK;

	/*
	 * Free any previous argument cache and replace it with
	 * the new argument cache, if any.
	 */
	oldargs = p->p_args;
	p->p_args = newargs;
	newargs = NULL;

	PROC_UNLOCK(p);

#ifdef	HWPMC_HOOKS
	/*
	 * Check if system-wide sampling is in effect or if the
	 * current process is using PMCs.  If so, do exec() time
	 * processing.  This processing needs to happen AFTER the
	 * P_INEXEC flag is cleared.
	 */
	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
		VOP_UNLOCK(imgp->vp, 0);
		pe.pm_credentialschanged = credential_changing;
		pe.pm_entryaddr = imgp->entry_addr;

		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
	}
#endif

	/* Set values passed into the program in registers. */
	(*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base);

	vfs_mark_atime(imgp->vp, td->td_ucred);

	SDT_PROBE1(proc, , , exec__success, args->fname);

exec_fail_dealloc:
	if (imgp->firstpage != NULL)
		exec_unmap_first_page(imgp);

	if (imgp->vp != NULL) {
		if (args->fname)
			NDFREE(&nd, NDF_ONLY_PNBUF);
		if (imgp->opened)
			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
		if (error != 0)
			vput(imgp->vp);
		else
			VOP_UNLOCK(imgp->vp, 0);
	}

	if (imgp->object != NULL)
		vm_object_deallocate(imgp->object);

	free(imgp->freepath, M_TEMP);

	if (error == 0) {
		if (p->p_ptevents & PTRACE_EXEC) {
			PROC_LOCK(p);
			if (p->p_ptevents & PTRACE_EXEC)
				td->td_dbgflags |= TDB_EXEC;
			PROC_UNLOCK(p);
		}

		/*
		 * Stop the process here if its stop event mask has
		 * the S_EXEC bit set.
		 */
		STOPEVENT(p, S_EXEC, 0);
	} else {
exec_fail:
		/* we're done here, clear P_INEXEC */
		PROC_LOCK(p);
		p->p_flag &= ~P_INEXEC;
		PROC_UNLOCK(p);

		SDT_PROBE1(proc, , , exec__failure, error);
	}

	if (imgp->newcred != NULL && oldcred != NULL)
		crfree(imgp->newcred);

#ifdef MAC
	mac_execve_exit(imgp);
	mac_execve_interpreter_exit(interpvplabel);
#endif
	exec_free_args(args);

	/*
	 * Handle deferred decrement of ref counts.
	 */
	if (oldtextvp != NULL)
		vrele(oldtextvp);
#ifdef KTRACE
	if (tracevp != NULL)
		vrele(tracevp);
	if (tracecred != NULL)
		crfree(tracecred);
#endif
	pargs_drop(oldargs);
	pargs_drop(newargs);
	if (oldsigacts != NULL)
		sigacts_free(oldsigacts);
	if (euip != NULL)
		uifree(euip);

	if (error && imgp->vmspace_destroyed) {
		/* sorry, no more process anymore. exit gracefully */
		exit1(td, 0, SIGABRT);
		/* NOT REACHED */
	}

#ifdef KTRACE
	if (error == 0)
		ktrprocctor(p);
#endif

	/*
	 * We don't want cpu_set_syscall_retval() to overwrite any of
	 * the register values put in place by exec_setregs().
	 * Implementations of cpu_set_syscall_retval() will leave
	 * registers unmodified when returning EJUSTRETURN.
	 */
	return (error == 0 ? EJUSTRETURN : error);
}