Esempio n. 1
0
/* ARGSUSED */
static int
soo_read(struct fileproc *fp, struct uio *uio, __unused int flags,
#if !CONFIG_MACF_SOCKET
	__unused
#endif
	vfs_context_t ctx)
{
	struct socket *so;
	int stat;
#if CONFIG_MACF_SOCKET
	int error;
#endif

	int (*fsoreceive)(struct socket *so2, struct sockaddr **paddr,
	    struct uio *uio2, struct mbuf **mp0, struct mbuf **controlp,
	    int *flagsp);

	if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) {
		/* This is not a valid open file descriptor */
		return (EBADF);
	}

#if CONFIG_MACF_SOCKET
	error = mac_socket_check_receive(vfs_context_ucred(ctx), so);
	if (error)
		return (error);
#endif /* CONFIG_MACF_SOCKET */

	fsoreceive = so->so_proto->pr_usrreqs->pru_soreceive;

	stat = (*fsoreceive)(so, 0, uio, 0, 0, 0);
	return (stat);
}
Esempio n. 2
0
static void
sd_log(vfs_context_t ctx, const char *fmt, ...) 
{
	int resid, log_error, len;
	char logbuf[100];
	va_list arglist;

	/* If the log isn't open yet, open it */
	if (sd_logvp == NULLVP) {
		if (sd_openlog(ctx) != 0) {
			/* Couldn't open, we fail out */
			return;
		}
	}

	va_start(arglist, fmt);
	len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
	log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
			UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
	if (log_error == EIO || log_error == 0) {
		sd_log_offset += (len - resid);
	}

	va_end(arglist);

}
Esempio n. 3
0
int
kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag)
{
    return (vn_rdwr(UIO_READ, ref->vp,
			addr, len, offset,
			UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, 
                        vfs_context_ucred(ref->ctx), (int *) 0,
			vfs_context_proc(ref->ctx)));
}
Esempio n. 4
0
int
kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len)
{
    return (vn_rdwr(UIO_WRITE, ref->vp,
			addr, len, offset,
			UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, 
                        vfs_context_ucred(ref->ctx), (int *) 0,
			vfs_context_proc(ref->ctx)));
}
Esempio n. 5
0
int
soo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
{
	struct socket *so = (struct socket *)fp->f_fglob->fg_data;
	int retnum = 0;
	proc_t procp;

	if (so == NULL || so == (struct socket *)-1)
		return (0);

	procp = vfs_context_proc(ctx);

#if CONFIG_MACF_SOCKET
	if (mac_socket_check_select(vfs_context_ucred(ctx), so, which) != 0)
		return (0);
#endif /* CONFIG_MACF_SOCKET */


	socket_lock(so, 1);
	switch (which) {

	case FREAD:
		so->so_rcv.sb_flags |= SB_SEL;
		if (soreadable(so)) {
			retnum = 1;
			so->so_rcv.sb_flags &= ~SB_SEL;
			goto done;
		}
		selrecord(procp, &so->so_rcv.sb_sel, wql);
		break;

	case FWRITE:
		so->so_snd.sb_flags |= SB_SEL;
		if (sowriteable(so)) {
			retnum = 1;
			so->so_snd.sb_flags &= ~SB_SEL;
			goto done;
		}
		selrecord(procp, &so->so_snd.sb_sel, wql);
		break;

	case 0:
		so->so_rcv.sb_flags |= SB_SEL;
		if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
			retnum = 1;
			so->so_rcv.sb_flags &= ~SB_SEL;
			goto done;
		}
		selrecord(procp, &so->so_rcv.sb_sel, wql);
		break;
	}

done:
	socket_unlock(so, 1);
	return (retnum);
}
Esempio n. 6
0
int
vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
{
	int error = 0;
	vfs_context_t ctx;

	ctx = vfs_context_kernel();
		
	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
		UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));

	return (error);
}
Esempio n. 7
0
int
kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
{
        struct proc *p = current_proc();
        int     resid;
        int     error;

        assert(buf != NULL);
        assert(fp != NULL && fp->f_vp != NULL);

        error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos, 
                        UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
        if ((error) || (nbytes == resid)) {
                if (!error)
                        error = -EINVAL;
                return error;
        }
        *pos += nbytes - resid;

        return (int)(nbytes - resid);
}
Esempio n. 8
0
/*
 * Compute the same user access value as getattrlist(2)
 */
u_int32_t getuseraccess(znode_t *zp, vfs_context_t ctx)
{
	vnode_t	*vp;
	u_int32_t	user_access = 0;
    zfs_acl_phys_t acl_phys;
    int error;
	/* Only take the expensive vnode_authorize path when we have an ACL */

    error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
                      &acl_phys, sizeof (acl_phys));

	if (error || acl_phys.z_acl_count == 0) {
		kauth_cred_t	cred = vfs_context_ucred(ctx);
		uint64_t		obj_uid;
		uint64_t    	obj_mode;

		/* User id 0 (root) always gets access. */
		if (!vfs_context_suser(ctx)) {
			return (R_OK | W_OK | X_OK);
		}

        sa_lookup(zp->z_sa_hdl, SA_ZPL_UID(zp->z_zfsvfs),
                  &obj_uid, sizeof (obj_uid));
        sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zp->z_zfsvfs),
                  &obj_mode, sizeof (obj_mode));

		//obj_uid = pzp->zp_uid;
		obj_mode = obj_mode & MODEMASK;
		if (obj_uid == UNKNOWNUID) {
			obj_uid = kauth_cred_getuid(cred);
		}
		if ((obj_uid == kauth_cred_getuid(cred)) ||
		    (obj_uid == UNKNOWNUID)) {
			return (((u_int32_t)obj_mode & S_IRWXU) >> 6);
		}
		/* Otherwise, settle for 'others' access. */
		return ((u_int32_t)obj_mode & S_IRWXO);
	}
Esempio n. 9
0
/* ARGSUSED */
static int
soo_write(struct fileproc *fp, struct uio *uio, __unused int flags,
	vfs_context_t ctx)
{
	struct socket *so;
	int stat;
	int (*fsosend)(struct socket *so2, struct sockaddr *addr,
	    struct uio *uio2, struct mbuf *top, struct mbuf *control,
	    int flags2);
	proc_t procp;

#if CONFIG_MACF_SOCKET
	int error;
#endif

	if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) {
		/* This is not a valid open file descriptor */
		return (EBADF);
	}

#if CONFIG_MACF_SOCKET
	/* JMM - have to fetch the socket's remote addr */
	error = mac_socket_check_send(vfs_context_ucred(ctx), so, NULL);
	if (error)
		return (error);
#endif /* CONFIG_MACF_SOCKET */

	fsosend = so->so_proto->pr_usrreqs->pru_sosend;

	stat = (*fsosend)(so, 0, uio, 0, 0, 0);

	/* Generation of SIGPIPE can be controlled per socket */
	procp = vfs_context_proc(ctx);
	if (stat == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
		psignal(procp, SIGPIPE);

	return (stat);
}
Esempio n. 10
0
static int
nsmb_dev_open_nolock(dev_t dev, int oflags, int devtype, struct proc *p)
{
#pragma unused(oflags, devtype, p)
	struct smb_dev *sdp;
	kauth_cred_t cred = vfs_context_ucred(vfs_context_current());

	sdp = SMB_GETDEV(dev);
	if (sdp && (sdp->sd_flags & NSMBFL_OPEN))
		return (EBUSY);
	if (!sdp || minor(dev) == 0) {
		int	avail_minor;

		for (avail_minor = 1; avail_minor < SMBMINORS; avail_minor++)
			if (!SMB_GETDEV(avail_minor))
				break;
		if (avail_minor >= SMBMINORS) {
			SMBERROR("Too many minor devices, %d >= %d !", avail_minor, SMBMINORS);
			return (ENOMEM);
		}
        SMB_MALLOC(sdp, struct smb_dev *, sizeof(*sdp), M_NSMBDEV, M_WAITOK);
		bzero(sdp, sizeof(*sdp));
		dev = makedev(smb_major, avail_minor);
		sdp->sd_devfs = devfs_make_node(dev, DEVFS_CHAR,
						kauth_cred_getuid(cred),
						kauth_cred_getgid(cred),
						0700, "nsmb%x", avail_minor);
		if (!sdp->sd_devfs) {
			SMBERROR("devfs_make_node failed %d\n", avail_minor);
			SMB_FREE(sdp, M_NSMBDEV);
			return (ENOMEM);
		}
		if (avail_minor > smb_minor_hiwat)
			smb_minor_hiwat = avail_minor;
		SMB_GETDEV(dev) = sdp;
		return (EBUSY);
	}
Esempio n. 11
0
/*
 * Convert a pathname into a pointer to a locked inode.
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *	copy in name
 *	get starting directory
 *	while (!done && !error) {
 *		call lookup to search path.
 *		if symbolic link, massage name in buffer and continue
 *	}
 *
 * Returns:	0			Success
 *		ENOENT			No such file or directory
 *		ELOOP			Too many levels of symbolic links
 *		ENAMETOOLONG		Filename too long
 *		copyinstr:EFAULT	Bad address
 *		copyinstr:ENAMETOOLONG	Filename too long
 *		lookup:EBADF		Bad file descriptor
 *		lookup:EROFS
 *		lookup:EACCES
 *		lookup:EPERM
 *		lookup:ERECYCLE	 vnode was recycled from underneath us in lookup.
 *						 This means we should re-drive lookup from this point.
 *		lookup: ???
 *		VNOP_READLINK:???
 */
int
namei(struct nameidata *ndp)
{
	struct filedesc *fdp;	/* pointer to file descriptor state */
	struct vnode *dp;	/* the directory we are searching */
	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
										   	heavy vnode pressure */
	u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
	int error;
	struct componentname *cnp = &ndp->ni_cnd;
	vfs_context_t ctx = cnp->cn_context;
	proc_t p = vfs_context_proc(ctx);
#if CONFIG_AUDIT
/* XXX ut should be from context */
	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
#endif

	fdp = p->p_fd;

#if DIAGNOSTIC
	if (!vfs_context_ucred(ctx) || !p)
		panic ("namei: bad cred/proc");
	if (cnp->cn_nameiop & (~OPMASK))
		panic ("namei: nameiop contaminated with flags");
	if (cnp->cn_flags & OPMASK)
		panic ("namei: flags contaminated with nameiops");
#endif

	/*
	 * A compound VNOP found something that needs further processing:
	 * either a trigger vnode, a covered directory, or a symlink.
	 */
	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
		int rdonly, vbusyflags, keep_going, wantparent;

		rdonly = cnp->cn_flags & RDONLY;
		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
		keep_going = 0;
		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);

		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);

		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags, 
				&keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
		if (error)
			goto out_drop;
		if (keep_going) {
			if ((cnp->cn_flags & ISSYMLINK) == 0) {
				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
			}
			goto continue_symlink;
		}

		return 0;

	}

vnode_recycled:

	/*
	 * Get a buffer for the name to be translated, and copy the
	 * name into the buffer.
	 */
	if ((cnp->cn_flags & HASBUF) == 0) {
		cnp->cn_pnbuf = ndp->ni_pathbuf;
		cnp->cn_pnlen = PATHBUFLEN;
	}
#if LP64_DEBUG
	if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0)
		&& (ndp->ni_segflg != UIO_SYSSPACE)
		&& (ndp->ni_segflg != UIO_SYSSPACE32)) {
		panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); 
	}
#endif /* LP64_DEBUG */

retry_copy:
	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
	} else {
		error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
	}
	if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
		MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
		if (cnp->cn_pnbuf == NULL) {
			error = ENOMEM;
			goto error_out;
		}

		cnp->cn_flags |= HASBUF;
		cnp->cn_pnlen = MAXPATHLEN;
		
		goto retry_copy;
	}
	if (error)
	        goto error_out;

#if CONFIG_VOLFS
 	/*
	 * Check for legacy volfs style pathnames.
	 *
	 * For compatibility reasons we currently allow these paths,
	 * but future versions of the OS may not support them.
	 */
	if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
	    cnp->cn_pnbuf[0] == '/' &&
	    cnp->cn_pnbuf[1] == '.' &&
	    cnp->cn_pnbuf[2] == 'v' &&
	    cnp->cn_pnbuf[3] == 'o' &&
	    cnp->cn_pnbuf[4] == 'l' &&
	    cnp->cn_pnbuf[5] == '/' ) {
		char * realpath;
		int realpath_err;
		/* Attempt to resolve a legacy volfs style pathname. */
		MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
		if (realpath) {
			/* 
			 * We only error out on the ENAMETOOLONG cases where we know that
			 * vfs_getrealpath translation succeeded but the path could not fit into
			 * MAXPATHLEN characters.  In other failure cases, we may be dealing with a path
			 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
			 */
			if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) {
				FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI);
				if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){
					error = ENAMETOOLONG;
					goto error_out;
				}
			} else {
				if (cnp->cn_flags & HASBUF) {
					FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
				}
				cnp->cn_pnbuf = realpath;
				cnp->cn_pnlen = MAXPATHLEN;
				ndp->ni_pathlen = strlen(realpath) + 1;
				cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
			}
		}
	}
#endif /* CONFIG_VOLFS */

#if CONFIG_AUDIT
	/* If we are auditing the kernel pathname, save the user pathname */
	if (cnp->cn_flags & AUDITVNPATH1)
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1); 
	if (cnp->cn_flags & AUDITVNPATH2)
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2); 
#endif /* CONFIG_AUDIT */

	/*
	 * Do not allow empty pathnames
	 */
	if (*cnp->cn_pnbuf == '\0') {
		error = ENOENT;
		goto error_out;
	}
	ndp->ni_loopcnt = 0;

	/*
	 * determine the starting point for the translation.
	 */
	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) {
	        if ( !(fdp->fd_flags & FD_CHROOT))
		        ndp->ni_rootdir = rootvnode;
	}
	cnp->cn_nameptr = cnp->cn_pnbuf;

	ndp->ni_usedvp = NULLVP;

	if (*(cnp->cn_nameptr) == '/') {
	        while (*(cnp->cn_nameptr) == '/') {
		        cnp->cn_nameptr++;
			ndp->ni_pathlen--;
		}
		dp = ndp->ni_rootdir;
	} else if (cnp->cn_flags & USEDVP) {
	        dp = ndp->ni_dvp;
		ndp->ni_usedvp = dp;
	} else
	        dp = vfs_context_cwd(ctx);

	if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
	        error = ENOENT;
		goto error_out;
	}
	ndp->ni_dvp = NULLVP;
	ndp->ni_vp  = NULLVP;

	for (;;) {
		ndp->ni_startdir = dp;

		if ( (error = lookup(ndp)) ) {
			goto error_out;
		}
		/*
		 * Check for symbolic link
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			return (0);
		}

continue_symlink:
		/* Gives us a new path to process, and a starting dir */
		error = lookup_handle_symlink(ndp, &dp, ctx);
		if (error != 0) {
			break;
		}
	}
	/*
	 * only come here if we fail to handle a SYMLINK...
	 * if either ni_dvp or ni_vp is non-NULL, then
	 * we need to drop the iocount that was picked
	 * up in the lookup routine
	 */
out_drop:
	if (ndp->ni_dvp)
	        vnode_put(ndp->ni_dvp);
	if (ndp->ni_vp)
	        vnode_put(ndp->ni_vp);
 error_out:
	if ( (cnp->cn_flags & HASBUF) ) {
		cnp->cn_flags &= ~HASBUF;
		FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
	}
	cnp->cn_pnbuf = NULL;
	ndp->ni_vp = NULLVP;
	ndp->ni_dvp = NULLVP;
	if (error == ERECYCLE){
		/* vnode was recycled underneath us. re-drive lookup to start at 
		   the beginning again, since recycling invalidated last lookup*/
		ndp->ni_cnd.cn_flags = cnpflags;
		ndp->ni_dvp = usedvp;
		goto vnode_recycled;
	}


	return (error);
}
Esempio n. 12
0
__private_extern__
int
fuse_internal_access(vnode_t                   vp,
                     int                       action,
                     vfs_context_t             context,
                     struct fuse_access_param *facp)
{
    int err = 0;
    int default_error = 0;
    uint32_t mask = 0;
    int dataflags;
    mount_t mp;
    struct fuse_dispatcher fdi;
    struct fuse_access_in *fai;
    struct fuse_data      *data;

    fuse_trace_printf_func();

    mp = vnode_mount(vp);

    data = fuse_get_mpdata(mp);
    dataflags = data->dataflags;

    /* Allow for now; let checks be handled inline later. */
    if (fuse_isdeferpermissions_mp(mp)) {
        return 0;
    }

    if (facp->facc_flags & FACCESS_FROM_VNOP) {
        default_error = ENOTSUP;
    }

    /*
     * (action & KAUTH_VNODE_GENERIC_WRITE_BITS) on a read-only file system
     * would have been handled by higher layers.
     */

    if (!fuse_implemented(data, FSESS_NOIMPLBIT(ACCESS))) {
        return default_error;
    }

    /* Unless explicitly permitted, deny everyone except the fs owner. */
    if (!vnode_isvroot(vp) && !(facp->facc_flags & FACCESS_NOCHECKSPY)) {
        if (!(dataflags & FSESS_ALLOW_OTHER)) {
            int denied = fuse_match_cred(data->daemoncred,
                                         vfs_context_ucred(context));
            if (denied) {
                return EPERM;
            }
        }
        facp->facc_flags |= FACCESS_NOCHECKSPY;
    }

    if (!(facp->facc_flags & FACCESS_DO_ACCESS)) {
        return default_error;
    }

    if (vnode_isdir(vp)) {
        if (action & (KAUTH_VNODE_LIST_DIRECTORY   |
                      KAUTH_VNODE_READ_EXTATTRIBUTES)) {
            mask |= R_OK;
        }
        if (action & (KAUTH_VNODE_ADD_FILE         |
                      KAUTH_VNODE_ADD_SUBDIRECTORY |
                      KAUTH_VNODE_DELETE_CHILD)) {
            mask |= W_OK;
        }
        if (action & KAUTH_VNODE_SEARCH) {
            mask |= X_OK;
        }
    } else {
        if (action & (KAUTH_VNODE_READ_DATA | KAUTH_VNODE_READ_EXTATTRIBUTES)) {
            mask |= R_OK;
        }
        if (action & (KAUTH_VNODE_WRITE_DATA | KAUTH_VNODE_APPEND_DATA)) {
            mask |= W_OK;
        }
        if (action & KAUTH_VNODE_EXECUTE) {
            mask |= X_OK;
        }
    }

    if (action & (KAUTH_VNODE_WRITE_ATTRIBUTES    |
                  KAUTH_VNODE_WRITE_EXTATTRIBUTES |
                  KAUTH_VNODE_WRITE_SECURITY)) {
        mask |= W_OK;
    }

    bzero(&fdi, sizeof(fdi));

    fdisp_init(&fdi, sizeof(*fai));
    fdisp_make_vp(&fdi, FUSE_ACCESS, vp, context);

    fai = fdi.indata;
    fai->mask = F_OK;
    fai->mask |= mask;

    if (!(err = fdisp_wait_answ(&fdi))) {
        fuse_ticket_drop(fdi.tick);
    }

    if (err == ENOSYS) {
        /*
         * Make sure we don't come in here again.
         */
        vfs_clearauthopaque(mp);
        fuse_clear_implemented(data, FSESS_NOIMPLBIT(ACCESS));
        err = default_error;
    }

    if (err == ENOENT) {

        const char *vname = NULL;

#if M_MACFUSE_ENABLE_UNSUPPORTED
        vname = vnode_getname(vp);
#endif /* M_MACFUSE_ENABLE_UNSUPPORTED */

        IOLog("MacFUSE: disappearing vnode %p (name=%s type=%d action=%x)\n",
              vp, (vname) ? vname : "?", vnode_vtype(vp), action);

#if M_MACFUSE_ENABLE_UNSUPPORTED
        if (vname) {
            vnode_putname(vname);
        }
#endif /* M_MACFUSE_ENABLE_UNSUPPORTED */

        /*
         * On 10.4, I think I can get Finder to lock because of /.Trashes/<uid>
         * unless I use REVOKE_NONE here.
         */
         
#if M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK && !M_MACFUSE_ENABLE_HUGE_LOCK
        fuse_biglock_unlock(data->biglock);
#endif
        fuse_internal_vnode_disappear(vp, context, REVOKE_SOFT);
#if M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK && !M_MACFUSE_ENABLE_HUGE_LOCK
        fuse_biglock_lock(data->biglock);
#endif
    }

    return err;
}
Esempio n. 13
0
static int
zfs_vfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
{
	char	*osname = NULL;
	size_t  osnamelen = 0;
	int		error = 0;
	int		canwrite;
	/*
	 * Get the objset name (the "special" mount argument).
	 * The filesystem that we mount as root is defined in the
	 * "zfs-bootfs" property. 
	 */
	if (data) {
		user_addr_t fspec = USER_ADDR_NULL;
#ifndef __APPLE__
		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
		    DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) !=
		    DDI_SUCCESS)
			return (EIO);

		error = parse_bootpath(zfs_bootpath, rootfs.bo_name);
		ddi_prop_free(zfs_bootpath);
#endif
		osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);

		if (vfs_context_is64bit(context)) {
			if ( (error = copyin(data, (caddr_t)&fspec, sizeof(fspec))) )
				goto out;	
		} else {
#ifdef ZFS_LEOPARD_ONLY
			char *tmp;
#else
			user32_addr_t tmp;
#endif
			if ( (error = copyin(data, (caddr_t)&tmp, sizeof(tmp))) )
				goto out;	
			/* munge into LP64 addr */
			fspec = CAST_USER_ADDR_T(tmp);
		}
		if ( (error = copyinstr(fspec, osname, MAXPATHLEN, &osnamelen)) )
			goto out;
	}

#if 0
	if (mvp->v_type != VDIR)
		return (ENOTDIR);

	mutex_enter(&mvp->v_lock);
	if ((uap->flags & MS_REMOUNT) == 0 &&
	    (uap->flags & MS_OVERLAY) == 0 &&
	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
		mutex_exit(&mvp->v_lock);
		return (EBUSY);
	}
	mutex_exit(&mvp->v_lock);

	/*
	 * ZFS does not support passing unparsed data in via MS_DATA.
	 * Users should use the MS_OPTIONSTR interface; this means
	 * that all option parsing is already done and the options struct
	 * can be interrogated.
	 */
	if ((uap->flags & MS_DATA) && uap->datalen > 0)
		return (EINVAL);

	/*
	 * Get the objset name (the "special" mount argument).
	 */
	if (error = pn_get(uap->spec, fromspace, &spn))
		return (error);

	osname = spn.pn_path;
#endif
	/*
	 * Check for mount privilege?
	 *
	 * If we don't have privilege then see if
	 * we have local permission to allow it
	 */
#ifndef __APPLE__
	error = secpolicy_fs_mount(cr, mvp, vfsp);
	if (error) {
		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
		if (error == 0) {
			vattr_t		vattr;

			/*
			 * Make sure user is the owner of the mount point
			 * or has sufficient privileges.
			 */

			vattr.va_mask = AT_UID;

			if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) {
				goto out;
			}

			if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) {
				goto out;
			}

			if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) {
				goto out;
			}

			secpolicy_fs_mount_clearopts(cr, vfsp);
		} else {
			goto out;
		}
	}
#endif

	error = zfs_domount(mp, 0, osname, context);
	if (error)
		printf("zfs_vfs_mount: error %d\n", error);
	if (error == 0) {
		zfsvfs_t *zfsvfs = NULL;

		/* Make the Finder treat sub file systems just like a folder */
		if (strpbrk(osname, "/"))
			vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DONTBROWSE));

		/* Indicate to VFS that we support ACLs. */
		vfs_setextendedsecurity(mp);

		/* Advisory locking should be handled at the VFS layer */
		vfs_setlocklocal(mp);

		/*
		 * Mac OS X needs a file system modify time
		 *
		 * We use the mtime of the "com.apple.system.mtime" 
		 * extended attribute, which is associated with the
		 * file system root directory.
		 *
		 * Here we need to take a ref on z_mtime_vp to keep it around.
		 * If the attribute isn't there, attempt to create it.
		 */
		zfsvfs = vfs_fsprivate(mp);
		if (zfsvfs->z_mtime_vp == NULL) {
			struct vnode * rvp;
			struct vnode *xdvp = NULLVP;
			struct vnode *xvp = NULLVP;
			znode_t *rootzp;
			timestruc_t modify_time;
			cred_t  *cr;
			timestruc_t  now;
			int flag;
			int result;

			if (zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp) != 0) {
				goto out;
			}
			rvp = ZTOV(rootzp);
			cr = (cred_t *)vfs_context_ucred(context);

			/* Grab the hidden attribute directory vnode. */
			result = zfs_get_xattrdir(rootzp, &xdvp, cr, CREATE_XATTR_DIR);
			vnode_put(rvp);	/* all done with root vnode */
			rvp = NULL;
			if (result) {
				goto out;
			}

			/*
			 * HACK - workaround missing vnode_setnoflush() KPI...
			 *
			 * We tag zfsvfs so that zfs_attach_vnode() can then set
			 * vnfs_marksystem when the vnode gets created.
			 */
			zfsvfs->z_last_unmount_time = 0xBADC0DE;
			zfsvfs->z_last_mtime_synced = VTOZ(xdvp)->z_id;
			flag = vfs_isrdonly(mp) ? 0 : ZEXISTS;
			/* Lookup or create the named attribute. */
			if ( zfs_obtain_xattr(VTOZ(xdvp), ZFS_MTIME_XATTR,
			                          S_IRUSR | S_IWUSR, cr, &xvp,
			                          flag) ) {
					zfsvfs->z_last_unmount_time = 0;
					zfsvfs->z_last_mtime_synced = 0;
					vnode_put(xdvp);
					goto out;
				}
				gethrestime(&now);
			ZFS_TIME_ENCODE(&now, VTOZ(xvp)->z_phys->zp_mtime);
			vnode_put(xdvp);
			vnode_ref(xvp);

			zfsvfs->z_mtime_vp = xvp;
			ZFS_TIME_DECODE(&modify_time, VTOZ(xvp)->z_phys->zp_mtime);
			zfsvfs->z_last_unmount_time = modify_time.tv_sec;
			zfsvfs->z_last_mtime_synced = modify_time.tv_sec;

			/*
			 * Keep this referenced vnode from impeding an unmount.
			 *
			 * XXX vnode_setnoflush() is MIA from KPI (see workaround above).
			 */
#if 0
			vnode_setnoflush(xvp);
#endif
			vnode_put(xvp);
		}
	}
out:
	if (osname) {
		kmem_free(osname, MAXPATHLEN);
	}
	return (error);
}
Esempio n. 14
0
static int
zfs_domount(struct mount *mp, dev_t mount_dev, char *osname, vfs_context_t ctx)
{
	uint64_t readonly;
	int error = 0;
	int mode;
	zfsvfs_t *zfsvfs;
	znode_t *zp = NULL;
	struct timeval tv;

	ASSERT(mp);
	ASSERT(osname);

	/*
	 * Initialize the zfs-specific filesystem structure.
	 * Should probably make this a kmem cache, shuffle fields,
	 * and just bzero up to z_hold_mtx[].
	 */
	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
	zfsvfs->z_vfs = mp;
	zfsvfs->z_parent = zfsvfs;
	zfsvfs->z_assign = TXG_NOWAIT;
	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;

	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
	    offsetof(znode_t, z_link_node));
	rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL);
	rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL);
#ifndef __APPLE__
	/* Initialize the generic filesystem structure. */
	vfsp->vfs_bcount = 0;
	vfsp->vfs_data = NULL;

	if (zfs_create_unique_device(&mount_dev) == -1) {
		error = ENODEV;
		goto out;
	}
	ASSERT(vfs_devismounted(mount_dev) == 0);
#endif

	vfs_setfsprivate(mp, zfsvfs);

	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
		goto out;

	if (readonly) {
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
		vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_RDONLY));
	} else {
		mode = DS_MODE_PRIMARY;
	}
	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
	if (error == EROFS) {
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
		    &zfsvfs->z_os);
	}

	if (error)
		goto out;

	if (error = zfs_init_fs(zfsvfs, &zp, (cred_t *) vfs_context_ucred(ctx)))
		goto out;

	/* The call to zfs_init_fs leaves the vnode held, release it here. */
	vnode_put(ZTOV(zp));

	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
		uint64_t xattr;

		ASSERT(mode & DS_MODE_READONLY);
#if 0
		atime_changed_cb(zfsvfs, B_FALSE);
		readonly_changed_cb(zfsvfs, B_TRUE);
		if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL))
			goto out;
		xattr_changed_cb(zfsvfs, xattr);
#endif
		zfsvfs->z_issnap = B_TRUE;
	} else {
		
		if (!vfs_isrdonly(mp))
			zfs_unlinked_drain(zfsvfs);

#ifndef __APPLE__
		/*
		 * Parse and replay the intent log.
		 *
		 * Because of ziltest, this must be done after
		 * zfs_unlinked_drain().  (Further note: ziltest doesn't
		 * use readonly mounts, where zfs_unlinked_drain() isn't
		 * called.)  This is because ziltest causes spa_sync()
		 * to think it's committed, but actually it is not, so
		 * the intent log contains many txg's worth of changes.
		 *
		 * In particular, if object N is in the unlinked set in
		 * the last txg to actually sync, then it could be
		 * actually freed in a later txg and then reallocated in
		 * a yet later txg.  This would write a "create object
		 * N" record to the intent log.  Normally, this would be
		 * fine because the spa_sync() would have written out
		 * the fact that object N is free, before we could write
		 * the "create object N" intent log record.
		 *
		 * But when we are in ziltest mode, we advance the "open
		 * txg" without actually spa_sync()-ing the changes to
		 * disk.  So we would see that object N is still
		 * allocated and in the unlinked set, and there is an
		 * intent log record saying to allocate it.
		 */
		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
		    zfs_replay_vector);

		if (!zil_disable)
			zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
#endif
	}

#if 0
	if (!zfsvfs->z_issnap)
		zfsctl_create(zfsvfs);
#endif

	/*
	 * Record the mount time (for Spotlight)
	 */
	microtime(&tv);
	zfsvfs->z_mount_time = tv.tv_sec;
	
out:
	if (error) {
		if (zfsvfs->z_os)
			dmu_objset_close(zfsvfs->z_os);
		mutex_destroy(&zfsvfs->z_znodes_lock);
		list_destroy(&zfsvfs->z_all_znodes);
		rw_destroy(&zfsvfs->z_unmount_lock);
		rw_destroy(&zfsvfs->z_unmount_inactive_lock);
		kmem_free(zfsvfs, sizeof (zfsvfs_t));
	} else {
		OSIncrementAtomic(&zfs_active_fs_count);
		(void) copystr(osname, vfs_statfs(mp)->f_mntfromname, MNAMELEN - 1, 0);
		vfs_getnewfsid(mp);
	}

	return (error);
}
Esempio n. 15
0
/*
 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
 * XXX usage is PROT_* from an interface perspective.  Thus the values of
 * XXX VM_PROT_* and PROT_* need to correspond.
 */
int
mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
{
	/*
	 *	Map in special device (must be SHARED) or file
	 */
	struct fileproc *fp;
	register struct		vnode *vp;
	int			flags;
	int			prot, file_prot;
	int			err=0;
	vm_map_t		user_map;
	kern_return_t		result;
	mach_vm_offset_t	user_addr;
	mach_vm_size_t		user_size;
	vm_object_offset_t	pageoff;
	vm_object_offset_t	file_pos;
	int			alloc_flags=0;
	boolean_t		docow;
	vm_prot_t		maxprot;
	void 			*handle;
	vm_pager_t		pager;
	int 			mapanon=0;
	int 			fpref=0;
	int error =0;
	int fd = uap->fd;

	user_addr = (mach_vm_offset_t)uap->addr;
	user_size = (mach_vm_size_t) uap->len;

	AUDIT_ARG(addr, user_addr);
	AUDIT_ARG(len, user_size);
	AUDIT_ARG(fd, uap->fd);

	prot = (uap->prot & VM_PROT_ALL);
#if 3777787
	/*
	 * Since the hardware currently does not support writing without
	 * read-before-write, or execution-without-read, if the request is
	 * for write or execute access, we must imply read access as well;
	 * otherwise programs expecting this to work will fail to operate.
	 */
	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
		prot |= VM_PROT_READ;
#endif	/* radar 3777787 */

	flags = uap->flags;
	vp = NULLVP;

	/*
	 * The vm code does not have prototypes & compiler doesn't do the'
	 * the right thing when you cast 64bit value and pass it in function 
	 * call. So here it is.
	 */
	file_pos = (vm_object_offset_t)uap->pos;


	/* make sure mapping fits into numeric range etc */
	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
		return (EINVAL);

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (file_pos & PAGE_MASK);
	file_pos -= (vm_object_offset_t)pageoff;


	/* Adjust size for rounding (on both ends). */
	user_size += pageoff;			/* low end... */
	user_size = mach_vm_round_page(user_size);	/* hi end */


	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & MAP_FIXED) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		user_addr -= pageoff;
		if (user_addr & PAGE_MASK)
		return (EINVAL);
	}
#ifdef notyet
	/* DO not have apis to get this info, need to wait till then*/
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
		addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);

#endif

	alloc_flags = 0;

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.  Use positive fds as the alias
		 * value for memory tracking. 
		 */
		if (fd != -1) {
			/*
			 * Use "fd" to pass (some) Mach VM allocation flags,
			 * (see the VM_FLAGS_* definitions).
			 */
			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
					    VM_FLAGS_PURGABLE);
			if (alloc_flags != fd) {
				/* reject if there are any extra flags */
				return EINVAL;
			}
		}
			
		handle = NULL;
		maxprot = VM_PROT_ALL;
		file_pos = 0;
		mapanon = 1;
	} else {
		struct vnode_attr va;
		vfs_context_t ctx = vfs_context_current();

		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		err = fp_lookup(p, fd, &fp, 0);
		if (err)
			return(err);
		fpref = 1;
		if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
			uap->addr = (user_addr_t)user_addr;
			uap->len = (user_size_t)user_size;
			uap->prot = prot;
			uap->flags = flags;
			uap->pos = file_pos;
			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
			goto bad;
		}

		if (fp->f_fglob->fg_type != DTYPE_VNODE) {
			error = EINVAL;
			goto bad;
		}
		vp = (struct vnode *)fp->f_fglob->fg_data;
		error = vnode_getwithref(vp);
		if(error != 0)
			goto bad;

		if (vp->v_type != VREG && vp->v_type != VCHR) {
			(void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
		
		/*
		 * POSIX: mmap needs to update access time for mapped files
		 */
		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
			VATTR_INIT(&va);
			nanotime(&va.va_access_time);
			VATTR_SET_ACTIVE(&va, va_access_time);
			vnode_setattr(vp, &va, ctx);
		}
		
		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR || vp->v_type == VSTR) {
			(void)vnode_put(vp);
			error = ENODEV;
			goto bad;
		} else {
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_fglob->fg_flag & FREAD)
				maxprot |= VM_PROT_READ;
			else if (prot & PROT_READ) {
				(void)vnode_put(vp);
				error = EACCES;
				goto bad;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out. 
			 */

			if ((flags & MAP_SHARED) != 0) {
				if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
 					/*
 					 * check for write access
 					 *
 					 * Note that we already made this check when granting FWRITE
 					 * against the file, so it seems redundant here.
 					 */
 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
 
 					/* if not granted for any reason, but we wanted it, bad */
 					if ((prot & PROT_WRITE) && (error != 0)) {
 						vnode_put(vp);
  						goto bad;
  					}
 
 					/* if writable, remember */
 					if (error == 0)
  						maxprot |= VM_PROT_WRITE;

				} else if ((prot & PROT_WRITE) != 0) {
					(void)vnode_put(vp);
					error = EACCES;
					goto bad;
				}
			} else
				maxprot |= VM_PROT_WRITE;

			handle = (void *)vp;
#if CONFIG_MACF
			error = mac_file_check_mmap(vfs_context_ucred(ctx),
			    fp->f_fglob, prot, flags, &maxprot);
			if (error) {
				(void)vnode_put(vp);
				goto bad;
			}
#endif /* MAC */
		}
	}

	if (user_size == 0)  {
		if (!mapanon)
			(void)vnode_put(vp);
		error = 0;
		goto bad;
	}

	/*
	 *	We bend a little - round the start and end addresses
	 *	to the nearest page boundary.
	 */
	user_size = mach_vm_round_page(user_size);

	if (file_pos & PAGE_MASK_64) {
		if (!mapanon)
			(void)vnode_put(vp);
		error = EINVAL;
		goto bad;
	}

	user_map = current_map();

	if ((flags & MAP_FIXED) == 0) {
		alloc_flags |= VM_FLAGS_ANYWHERE;
		user_addr = mach_vm_round_page(user_addr);
	} else {
		if (user_addr != mach_vm_trunc_page(user_addr)) {
		        if (!mapanon)
			        (void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}
		/*
		 * mmap(MAP_FIXED) will replace any existing mappings in the
		 * specified range, if the new mapping is successful.
		 * If we just deallocate the specified address range here,
		 * another thread might jump in and allocate memory in that
		 * range before we get a chance to establish the new mapping,
		 * and we won't have a chance to restore the old mappings.
		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
		 * has to deallocate the existing mappings and establish the
		 * new ones atomically.
		 */
		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
	}

	if (flags & MAP_NOCACHE)
		alloc_flags |= VM_FLAGS_NO_CACHE;

	/*
	 * Lookup/allocate object.
	 */
	if (handle == NULL) {
		pager = NULL;
#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 IPC_PORT_NULL, 0, FALSE,
						 prot, maxprot,
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);
		if (result != KERN_SUCCESS) 
				goto out;
	} else {
		pager = (vm_pager_t)ubc_getpager(vp);
		
		if (pager == NULL) {
			(void)vnode_put(vp);
			error = ENOMEM;
			goto bad;
		}

		/*
		 *  Set credentials:
		 *	FIXME: if we're writing the file we need a way to
		 *      ensure that someone doesn't replace our R/W creds
		 * 	with ones that only work for read.
		 */

		ubc_setthreadcred(vp, p, current_thread());
		docow = FALSE;
		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
			docow = TRUE;
		}

#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif /* notyet */

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 (ipc_port_t)pager, file_pos,
						 docow, prot, maxprot, 
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		if (result != KERN_SUCCESS)  {
				(void)vnode_put(vp);
				goto out;
		}

		file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
		if (docow) {
			/* private mapping: won't write to the file */
			file_prot &= ~PROT_WRITE;
		}
		(void) ubc_map(vp, file_prot);
	}

	if (!mapanon)
		(void)vnode_put(vp);

out:
	switch (result) {
	case KERN_SUCCESS:
		*retval = user_addr + pageoff;
		error = 0;
		break;
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		error =  ENOMEM;
		break;
	case KERN_PROTECTION_FAILURE:
		error =  EACCES;
		break;
	default:
		error =  EINVAL;
		break;
	}
bad:
	if (fpref)
		fp_drop(p, fd, fp, 0);

	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);

	return(error);
}
Esempio n. 16
0
IOReturn FileNVRAM::read_buffer(char** aBuffer, uint64_t* aLength, vfs_context_t aCtx)
{
	IOReturn error = 0;

	struct vnode * vp;
	struct vnode_attr va;

	if (aCtx)
	{
		if ((error = vnode_open(FILE_NVRAM_PATH, (O_RDONLY | FREAD | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, aCtx)))
		{
			printf("failed opening vnode at path %s, errno %d\n", FILE_NVRAM_PATH, error);
			
			return error;
		}
		else
		{
			if ((error = vnode_isreg(vp)) == VREG)
			{
				VATTR_INIT(&va);
				VATTR_WANTED(&va, va_data_size);	/* size in bytes of the fork managed by current vnode */

				// Determine size of vnode
				if ((error = vnode_getattr(vp, &va, aCtx)))
				{
					printf("FileNVRAM.kext: Error, failed to determine file size of %s, errno %d.\n", FILE_NVRAM_PATH, error);
				}
				else
				{
					if (aLength)
					{
						*aLength = va.va_data_size;
					}
					
					*aBuffer = (char *)IOMalloc((size_t)va.va_data_size);
					int len = (int)va.va_data_size;
					
					if ((error = vn_rdwr(UIO_READ, vp, *aBuffer, len, 0, UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(aCtx), (int *) 0, vfs_context_proc(aCtx))))
					{
						printf("FileNVRAM.kext: Error, writing to vnode(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
					}
				}
				
				if ((error = vnode_close(vp, 0, aCtx)))
				{
					printf("FileNVRAM.kext: Error, vnode_close(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
				}
			}
			else
			{
				printf("FileNVRAM.kext: Error, vnode_isreg(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
			}
		}
	}
	else
	{
		printf("FileNVRAM.kext: aCtx == NULL!\n");
		error = 0xFFFF; // EINVAL;
	}
	
	return error;
}
Esempio n. 17
0
void commonattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp,
                    const char *name, ino64_t objnum, enum vtype vtype,
                    boolean_t user64)
{
	attrgroup_t commonattr = aip->ai_attrlist->commonattr;
	void *attrbufptr = *aip->ai_attrbufpp;
	void *varbufptr = *aip->ai_varbufpp;
	struct mount *mp = zfsvfs->z_vfs;
	cred_t  *cr = (cred_t *)vfs_context_ucred(aip->ai_context);
	finderinfo_t finderinfo;

    /*
     * We should probably combine all the sa_lookup into a bulk
     * lookup operand.
     */

	finderinfo.fi_flags = 0;

	if (ATTR_CMN_NAME & commonattr) {
		nameattrpack(aip, name, strlen(name));
		attrbufptr = *aip->ai_attrbufpp;
		varbufptr = *aip->ai_varbufpp;
	}
	if (ATTR_CMN_DEVID & commonattr) {
		*((dev_t *)attrbufptr) = vfs_statfs(mp)->f_fsid.val[0];
		attrbufptr = ((dev_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_FSID & commonattr) {
		*((fsid_t *)attrbufptr) = vfs_statfs(mp)->f_fsid;
		attrbufptr = ((fsid_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_OBJTYPE & commonattr) {
		*((fsobj_type_t *)attrbufptr) = vtype;
		attrbufptr = ((fsobj_type_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_OBJTAG & commonattr) {
		*((fsobj_tag_t *)attrbufptr) = VT_ZFS;
		attrbufptr = ((fsobj_tag_t *)attrbufptr) + 1;
	}
	/*
	 * Note: ATTR_CMN_OBJID is lossy (only 32 bits).
	 */
	if ((ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID) & commonattr) {
		u_int32_t fileid;
		/*
		 * On Mac OS X we always export the root directory id as 2
		 */
		fileid = (objnum == zfsvfs->z_root) ? 2 : objnum;

		if (ATTR_CMN_OBJID & commonattr) {
			((fsobj_id_t *)attrbufptr)->fid_objno = fileid;
			((fsobj_id_t *)attrbufptr)->fid_generation = 0;
			attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
		}
		if (ATTR_CMN_OBJPERMANENTID & commonattr) {
			((fsobj_id_t *)attrbufptr)->fid_objno = fileid;
			((fsobj_id_t *)attrbufptr)->fid_generation = 0;
			attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
		}
	}
	/*
	 * Note: ATTR_CMN_PAROBJID is lossy (only 32 bits).
	 */
	if (ATTR_CMN_PAROBJID & commonattr) {
		uint64_t parentid;

        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
                         &parentid, sizeof (parentid)) == 0);

		/*
		 * On Mac OS X we always export the root
		 * directory id as 2 and its parent as 1
		 */
		if (zp && zp->z_id == zfsvfs->z_root)
			parentid = 1;
		else if (parentid == zfsvfs->z_root)
			parentid = 2;

		ASSERT(parentid != 0);

		((fsobj_id_t *)attrbufptr)->fid_objno = (uint32_t)parentid;
		((fsobj_id_t *)attrbufptr)->fid_generation = 0;
		attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_SCRIPT & commonattr) {
		*((text_encoding_t *)attrbufptr) = kTextEncodingMacUnicode;
		attrbufptr = ((text_encoding_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_CRTIME & commonattr) {
        uint64_t times[2];
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
                         times, sizeof(times)) == 0);
		if (user64) {
			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
		} else {
			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
		}
	}
	if (ATTR_CMN_MODTIME & commonattr) {
        uint64_t times[2];
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zfsvfs),
                         times, sizeof(times)) == 0);
		if (user64) {
			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
		} else {
			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
		}
	}
	if (ATTR_CMN_CHGTIME & commonattr) {
        uint64_t times[2];
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zfsvfs),
                         times, sizeof(times)) == 0);
		if (user64) {
			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
		} else {
			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
		}
	}
	if (ATTR_CMN_ACCTIME & commonattr) {
        uint64_t times[2];
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
                         times, sizeof(times)) == 0);
		if (user64) {
			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
		} else {
			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
			                times);
			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
		}
	}
	if (ATTR_CMN_BKUPTIME & commonattr) {
		/* legacy attribute -- just pass zero */
		if (user64) {
			((timespec_user64_t *)attrbufptr)->tv_sec = 0;
			((timespec_user64_t *)attrbufptr)->tv_nsec = 0;
			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
		}  else {
			((timespec_user32_t *)attrbufptr)->tv_sec = 0;
			((timespec_user32_t *)attrbufptr)->tv_nsec = 0;
			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
		}
	}
	if (ATTR_CMN_FNDRINFO & commonattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
                         &val, sizeof(val)) == 0);
		getfinderinfo(zp, cr, &finderinfo);
		/* Shadow ZFS_HIDDEN to Finder Info's invisible bit */
		if (val & ZFS_HIDDEN) {
			finderinfo.fi_flags |=
				OSSwapHostToBigConstInt16(kIsInvisible);
		}
		bcopy(&finderinfo, attrbufptr, sizeof (finderinfo));
		attrbufptr = (char *)attrbufptr + 32;
	}
	if (ATTR_CMN_OWNERID & commonattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_UID(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((uid_t *)attrbufptr) = val;
		attrbufptr = ((uid_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_GRPID & commonattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_GID(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((gid_t *)attrbufptr) = val;
		attrbufptr = ((gid_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_ACCESSMASK & commonattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((u_int32_t *)attrbufptr) = val;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_FLAGS & commonattr) {
		u_int32_t flags = zfs_getbsdflags(zp);

		/* Shadow Finder Info's invisible bit to UF_HIDDEN */
		if ((ATTR_CMN_FNDRINFO & commonattr) &&
		    (OSSwapBigToHostInt16(finderinfo.fi_flags) & kIsInvisible))
			flags |= UF_HIDDEN;

		*((u_int32_t *)attrbufptr) = flags;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_USERACCESS & commonattr) {
		u_int32_t user_access = 0;
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
                         &val, sizeof(val)) == 0);

		user_access = getuseraccess(zp, aip->ai_context);

		/* Also consider READ-ONLY file system. */
		if (vfs_flags(mp) & MNT_RDONLY) {
			user_access &= ~W_OK;
		}

		/* Locked objects are not writable either */
		if ((val & ZFS_IMMUTABLE) &&
		    (vfs_context_suser(aip->ai_context) != 0)) {
			user_access &= ~W_OK;
		}

		*((u_int32_t *)attrbufptr) = user_access;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_FILEID & commonattr) {
		/*
		 * On Mac OS X we always export the root directory id as 2
		 */
		if (objnum == zfsvfs->z_root)
			objnum = 2;

		*((u_int64_t *)attrbufptr) = objnum;
		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
	}
	if (ATTR_CMN_PARENTID & commonattr) {
		uint64_t parentid;

        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
                         &parentid, sizeof (parentid)) == 0);

		/*
		 * On Mac OS X we always export the root
		 * directory id as 2 and its parent as 1
		 */
		if (zp && zp->z_id == zfsvfs->z_root)
			parentid = 1;
		else if (parentid == zfsvfs->z_root)
			parentid = 2;

		ASSERT(parentid != 0);

		*((u_int64_t *)attrbufptr) = parentid;
		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
	}

	*aip->ai_attrbufpp = attrbufptr;
	*aip->ai_varbufpp = varbufptr;
}
Esempio n. 18
0
/*
 * Open a special file.
 */
int
spec_open(struct vnop_open_args *ap)
{
	struct proc *p = vfs_context_proc(ap->a_context);
	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
	struct vnode *vp = ap->a_vp;
	dev_t bdev, dev = (dev_t)vp->v_rdev;
	int maj = major(dev);
	int error;

	/*
	 * Don't allow open if fs is mounted -nodev.
	 */
	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
		return (ENXIO);

	switch (vp->v_type) {

	case VCHR:
		if ((u_int)maj >= (u_int)nchrdev)
			return (ENXIO);
		if (cred != FSCRED && (ap->a_mode & FWRITE)) {
			/*
			 * When running in very secure mode, do not allow
			 * opens for writing of any disk character devices.
			 */
			if (securelevel >= 2 && isdisk(dev, VCHR))
				return (EPERM);
			/*
			 * When running in secure mode, do not allow opens
			 * for writing of /dev/mem, /dev/kmem, or character
			 * devices whose corresponding block devices are
			 * currently mounted.
			 */
			if (securelevel >= 1) {
				if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
					return (error);
				if (iskmemdev(dev))
					return (EPERM);
			}
		}
		if (cdevsw[maj].d_type == D_TTY) {
			vnode_lock(vp);
			vp->v_flag |= VISTTY;
			vnode_unlock(vp);
		}
		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
		return (error);

	case VBLK:
		if ((u_int)maj >= (u_int)nblkdev)
			return (ENXIO);
		/*
		 * When running in very secure mode, do not allow
		 * opens for writing of any disk block devices.
		 */
		if (securelevel >= 2 && cred != FSCRED &&
		    (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
			return (EPERM);
		/*
		 * Do not allow opens of block devices that are
		 * currently mounted.
		 */
		if ( (error = vfs_mountedon(vp)) )
			return (error);
		error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
		if (!error) {
		    u_int64_t blkcnt;
		    u_int32_t blksize;
			int setsize = 0;
			u_int32_t size512 = 512;


		    if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
				/* Switch to 512 byte sectors (temporarily) */

				if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
			    	/* Get the number of 512 byte physical blocks. */
			    	if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
						setsize = 1;
			    	}
				}
				/* If it doesn't set back, we can't recover */
				if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
			    	error = ENXIO;
		    }


			vnode_lock(vp);
		    set_blocksize(vp, dev);

		    /*
		     * Cache the size in bytes of the block device for later
		     * use by spec_write().
		     */
			if (setsize)
				vp->v_specdevsize = blkcnt * (u_int64_t)size512;
			else
		    	vp->v_specdevsize = (u_int64_t)0;	/* Default: Can't get */
			
			vnode_unlock(vp);

		}
		return(error);
	default:
	        panic("spec_open type");
	}
	return (0);
}
Esempio n. 19
0
static errno_t
fuse_vfsop_setattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context)
{
    int error = 0;

    fuse_trace_printf_vfsop();

    kauth_cred_t cred = vfs_context_ucred(context);

    if (!fuse_vfs_context_issuser(context) &&
            (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner)) {
        return EACCES;
    }

    struct fuse_data *data = fuse_get_mpdata(mp);

    if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {

        if (!fuse_implemented(data, FSESS_NOIMPLBIT(SETVOLNAME))) {
            error = ENOTSUP;
            goto out;
        }

        if (fsap->f_vol_name[0] == 0) {
            error = EINVAL;
            goto out;
        }

        size_t namelen = strlen(fsap->f_vol_name);
        if (namelen >= MAXPATHLEN) {
            error = ENAMETOOLONG;
            goto out;
        }

        vnode_t root_vp;

        error = fuse_vfsop_root(mp, &root_vp, context);
        if (error) {
            goto out;
        }

        struct fuse_dispatcher fdi;
        fdisp_init(&fdi, namelen + 1);
        fdisp_make_vp(&fdi, FUSE_SETVOLNAME, root_vp, context);
        memcpy((char *)fdi.indata, fsap->f_vol_name, namelen);
        ((char *)fdi.indata)[namelen] = '\0';

        if (!(error = fdisp_wait_answ(&fdi))) {
            fuse_ticket_drop(fdi.tick);
        }

        (void)vnode_put(root_vp);

        if (error) {
            if (error == ENOSYS) {
                error = ENOTSUP;
                fuse_clear_implemented(data, FSESS_NOIMPLBIT(SETVOLNAME));
            }
            goto out;
        }

        copystr(fsap->f_vol_name, data->volname, MAXPATHLEN - 1, &namelen);
        bzero(data->volname + namelen, MAXPATHLEN - namelen);

        VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
    }

out:
    return error;
}
Esempio n. 20
0
/*
 * Accounting system call.  Written based on the specification and
 * previous implementation done by Mark Tinguely.
 */
int
acct(proc_t p, struct acct_args *uap, __unused int *retval)
{
	struct nameidata nd;
	int error;
	struct vfs_context *ctx; 

	ctx = vfs_context_current();

	/* Make sure that the caller is root. */
	if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
		return (error);

	/*
	 * If accounting is to be started to a file, open that file for
	 * writing and make sure it's a 'normal'.
	 */
	if (uap->path != USER_ADDR_NULL) {
		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, ctx);
		if ((error = vn_open(&nd, FWRITE, 0)))
			return (error);
#if CONFIG_MACF
		error = mac_system_check_acct(vfs_context_ucred(ctx), nd.ni_vp);
		if (error) {
			vnode_put(nd.ni_vp);
			vn_close(nd.ni_vp, FWRITE, ctx);
			return (error);
		}
#endif
		vnode_put(nd.ni_vp);

		if (nd.ni_vp->v_type != VREG) {
			vn_close(nd.ni_vp, FWRITE, ctx);
			return (EACCES);
		}
	}
#if CONFIG_MACF
	else {
		error = mac_system_check_acct(vfs_context_ucred(ctx), NULL);
		if (error)
			return (error);
	}
#endif

	/*
	 * If accounting was previously enabled, kill the old space-watcher,
	 * close the file, and (if no new file was specified, leave).
	 */
	if (acctp != NULLVP || suspend_acctp != NULLVP) {
		untimeout(acctwatch_funnel, NULL);
		error = vn_close((acctp != NULLVP ? acctp : suspend_acctp),
				FWRITE, vfs_context_current());

		acctp = suspend_acctp = NULLVP;
	}
	if (uap->path == USER_ADDR_NULL)
		return (error);

	/*
	 * Save the new accounting file vnode, and schedule the new
	 * free space watcher.
	 */
	acctp = nd.ni_vp;
	acctwatch(NULL);
	return (error);
}
Esempio n. 21
0
static int
vfs_mount_9p(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx)
{
#pragma unused(devvp)
	struct sockaddr *addr, *authaddr;
	struct vfsstatfs *sp;
	char authkey[DESKEYLEN+1];
	kauth_cred_t cred;
	user_args_9p args;
	mount_9p *nmp;
	size_t size;
	fid_9p fid;
	qid_9p qid;
	char *vers;
	int e;

	TRACE();
	nmp = NULL;
	addr = NULL;
	authaddr = NULL;
	fid = NOFID;

	if (vfs_isupdate(mp))
		return ENOTSUP;

	if (vfs_context_is64bit(ctx)) {
		if ((e=copyin(data, &args, sizeof(args))))
			goto error;
	} else {
		args_9p args32;
		if ((e=copyin(data, &args32, sizeof(args32))))
			goto error;
		args.spec			= CAST_USER_ADDR_T(args32.spec);
		args.addr			= CAST_USER_ADDR_T(args32.addr);
		args.addrlen		= args32.addrlen;
		args.authaddr		= CAST_USER_ADDR_T(args32.authaddr);
		args.authaddrlen	= args32.authaddrlen;
		args.volume			= CAST_USER_ADDR_T(args32.volume);
		args.uname			= CAST_USER_ADDR_T(args32.uname);
		args.aname			= CAST_USER_ADDR_T(args32.aname);
		args.authkey		= CAST_USER_ADDR_T(args32.authkey);
		args.flags			= args32.flags;
	}
	e = ENOMEM;
	nmp = malloc_9p(sizeof(*nmp));
	if (nmp == NULL)
		return e;

	nmp->mp = mp;
	TAILQ_INIT(&nmp->req);
	nmp->lck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->reqlck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->nodelck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->node = hashinit(desiredvnodes, M_TEMP, &nmp->nodelen);
	if (nmp->lck==NULL || nmp->reqlck==NULL || nmp->nodelck==NULL || nmp->node==NULL)
		goto error;

	if ((e=nameget_9p(args.volume, &nmp->volume)))
		goto error;
	if ((e=nameget_9p(args.uname, &nmp->uname)))
		goto error;
	if ((e=nameget_9p(args.aname, &nmp->aname)))
		goto error;

	cred = vfs_context_ucred(ctx);
	if (IS_VALID_CRED(cred)) {
		nmp->uid = kauth_cred_getuid(cred);
		nmp->gid = kauth_cred_getgid(cred);
	} else {
		nmp->uid = KAUTH_UID_NONE;
		nmp->gid = KAUTH_GID_NONE;
	}
	
	vfs_getnewfsid(mp);
	vfs_setfsprivate(mp, nmp);
	
	nmp->flags = args.flags;
	if ((e=addrget_9p(args.addr, args.addrlen, &addr)))
		goto error;
	if ((e=connect_9p(nmp, addr)))
		goto error;

	vers = VERSION9P;
	if (ISSET(nmp->flags, FLAG_DOTU))
		vers = VERSION9PDOTU;
	if ((e=version_9p(nmp, vers, &nmp->version)))
		goto error;
	if (ISSET(nmp->flags, FLAG_DOTU) && strcmp(VERSION9PDOTU, nmp->version)==0)
		SET(nmp->flags, F_DOTU);

	nmp->afid = NOFID;
	if (args.authaddr && args.authaddrlen && args.authkey) {
		if ((e=copyin(args.authkey, authkey, DESKEYLEN)))
			goto error;
		if ((e=addrget_9p(args.authaddr, args.authaddrlen, &authaddr)))
			goto error;
		if ((e=auth_9p(nmp, nmp->uname, nmp->aname, nmp->uid, &nmp->afid, &qid)))
			goto error;
		if (nmp->afid!=NOFID &&
			(e=authp9any_9p(nmp, nmp->afid, authaddr, nmp->uname, authkey)))
			goto error;
		bzero(authkey, DESKEYLEN);
	}
	if ((e=attach_9p(nmp, nmp->uname, nmp->aname, nmp->afid, nmp->uid, &fid, &qid)))
		goto error;

	if ((e=nget_9p(nmp, fid, qid, NULL, &nmp->root, NULL, ctx)))
		goto error;

	nunlock_9p(NTO9P(nmp->root));
	e = vnode_ref(nmp->root);
	vnode_put(nmp->root);
	if (e)
		goto error;

	vfs_setauthopaque(mp);
	vfs_clearauthopaqueaccess(mp);
	vfs_setlocklocal(mp);

	// init stats
	sp = vfs_statfs(nmp->mp);
	copyinstr(args.spec, sp->f_mntfromname, MNAMELEN-1, &size);
	bzero(sp->f_mntfromname+size, MNAMELEN-size);
	sp->f_bsize = PAGE_SIZE;
	sp->f_iosize = nmp->msize-IOHDRSZ;
	sp->f_blocks = sp->f_bfree = sp->f_bavail = sp->f_bused = 0;
	sp->f_files = 65535;
	sp->f_ffree = sp->f_files-2;
	sp->f_flags = vfs_flags(mp);
	
	free_9p(addr);
	free_9p(authaddr);
	return 0;

error:
	bzero(authkey, DESKEYLEN);
	free_9p(addr);
	free_9p(authaddr);
	if (nmp->so) {
		clunk_9p(nmp, fid);
		disconnect_9p(nmp);
	}
	freemount_9p(nmp);
	vfs_setfsprivate(mp, NULL);
	return e;
}
Esempio n. 22
0
/*
 * Mount null layer
 */
static int
nullfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
{
	int error                 = 0;
	struct vnode *lowerrootvp = NULL, *vp = NULL;
	struct vfsstatfs * sp   = NULL;
	struct null_mount * xmp = NULL;
	char data[MAXPATHLEN];
	size_t count;
	struct vfs_attr vfa;
	/* set defaults (arbitrary since this file system is readonly) */
	uint32_t bsize  = BLKDEV_IOSIZE;
	size_t iosize   = BLKDEV_IOSIZE;
	uint64_t blocks = 4711 * 4711;
	uint64_t bfree  = 0;
	uint64_t bavail = 0;
	uint64_t bused  = 4711;
	uint64_t files  = 4711;
	uint64_t ffree  = 0;

	kauth_cred_t cred = vfs_context_ucred(ctx);

	NULLFSDEBUG("nullfs_mount(mp = %p) %llx\n", (void *)mp, vfs_flags(mp));

	if (vfs_flags(mp) & MNT_ROOTFS)
		return (EOPNOTSUPP);

	/*
	 * Update is a no-op
	 */
	if (vfs_isupdate(mp)) {
		return ENOTSUP;
	}

	/* check entitlement */
	if (!IOTaskHasEntitlement(current_task(), NULLFS_ENTITLEMENT)) {
		return EPERM;
	}

	/*
	 * Get argument
	 */
	error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
	if (error) {
		NULLFSDEBUG("nullfs: error copying data form user %d\n", error);
		goto error;
	}

	/* This could happen if the system is configured for 32 bit inodes instead of
	 * 64 bit */
	if (count > MAX_MNT_FROM_LENGTH) {
		error = EINVAL;
		NULLFSDEBUG("nullfs: path to translocate too large for this system %d vs %d\n", count, MAX_MNT_FROM_LENGTH);
		goto error;
	}

	error = vnode_lookup(data, 0, &lowerrootvp, ctx);
	if (error) {
		NULLFSDEBUG("lookup %s -> %d\n", data, error);
		goto error;
	}

	/* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
	   Keep this to signal what we want to keep around the thing we are mirroring.
	   Drop it in unmount.*/
	error = vnode_ref(lowerrootvp);
	vnode_put(lowerrootvp);
	if (error)
	{
		// If vnode_ref failed, then null it out so it can't be used anymore in cleanup.
		lowerrootvp = NULL;
		goto error;
	}

	NULLFSDEBUG("mount %s\n", data);

	MALLOC(xmp, struct null_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO);
	if (xmp == NULL) {
		error = ENOMEM;
		goto error;
	}

	/*
	 * Save reference to underlying FS
	 */
	xmp->nullm_lowerrootvp  = lowerrootvp;
	xmp->nullm_lowerrootvid = vnode_vid(lowerrootvp);

	error = null_getnewvnode(mp, NULL, NULL, &vp, NULL, 1);
	if (error) {
		goto error;
	}

	/* vp has an iocount on it from vnode_create. drop that for a usecount. This
	 * is our root vnode so we drop the ref in unmount
	 *
	 * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
	vnode_ref(vp);
	vnode_put(vp);

	error = nullfs_init_lck(&xmp->nullm_lock);
	if (error) {
		goto error;
	}

	xmp->nullm_rootvp = vp;

	/* read the flags the user set, but then ignore some of them, we will only
	   allow them if they are set on the lower file system */
	uint64_t flags      = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
	uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);

	if (lowerflags) {
		flags |= lowerflags;
	}

	/* force these flags */
	flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
	vfs_setflags(mp, flags);

	vfs_setfsprivate(mp, xmp);
	vfs_getnewfsid(mp);
	vfs_setlocklocal(mp);

	/* fill in the stat block */
	sp = vfs_statfs(mp);
	strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);

	sp->f_flags = flags;

	xmp->nullm_flags = NULLM_CASEINSENSITIVE; /* default to case insensitive */

	error = nullfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
	if (error == 0) {
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
			bsize = vfa.f_bsize;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
			iosize = vfa.f_iosize;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
			blocks = vfa.f_blocks;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
			bfree = vfa.f_bfree;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
			bavail = vfa.f_bavail;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
			bused = vfa.f_bused;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
			files = vfa.f_files;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
			ffree = vfa.f_ffree;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
			if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
			    (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
				xmp->nullm_flags &= ~NULLM_CASEINSENSITIVE;
			}
		}
	} else {
		goto error;
	}

	sp->f_bsize  = bsize;
	sp->f_iosize = iosize;
	sp->f_blocks = blocks;
	sp->f_bfree  = bfree;
	sp->f_bavail = bavail;
	sp->f_bused  = bused;
	sp->f_files  = files;
	sp->f_ffree  = ffree;

	/* Associate the mac label information from the mirrored filesystem with the
	 * mirror */
	MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));

	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
	return (0);

error:
	if (xmp) {
		FREE(xmp, M_TEMP);
	}
	if (lowerrootvp) {
		vnode_getwithref(lowerrootvp);
		vnode_rele(lowerrootvp);
		vnode_put(lowerrootvp);
	}
	if (vp) {
		/* we made the root vnode but the mount is failed, so clean it up */
		vnode_getwithref(vp);
		vnode_rele(vp);
		/* give vp back */
		vnode_recycle(vp);
		vnode_put(vp);
	}
	return error;
}
Esempio n. 23
0
/*
 *	Routine:	macx_swapon
 *	Function:
 *		Syscall interface to add a file to backing store
 */
int
macx_swapon(
	struct macx_swapon_args *args)
{
	int			size = args->size;
	vnode_t			vp = (vnode_t)NULL; 
	struct nameidata 	nd, *ndp;
	register int		error;
	kern_return_t		kr;
	mach_port_t		backing_store;
	memory_object_default_t	default_pager;
	int			i;
	boolean_t		funnel_state;
	off_t			file_size;
	vfs_context_t		ctx = vfs_context_current();
	struct proc		*p =  current_proc();
	int			dp_cluster_size;


	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON);
	AUDIT_ARG(value32, args->priority);

	funnel_state = thread_funnel_set(kernel_flock, TRUE);
	ndp = &nd;

	if ((error = suser(kauth_cred_get(), 0)))
		goto swapon_bailout;

	/*
	 * Get a vnode for the paging area.
	 */
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
	       (user_addr_t) args->filename, ctx);

	if ((error = namei(ndp)))
		goto swapon_bailout;
	nameidone(ndp);
	vp = ndp->ni_vp;

	if (vp->v_type != VREG) {
		error = EINVAL;
		goto swapon_bailout;
	}

	/* get file size */
	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
		goto swapon_bailout;
#if CONFIG_MACF
	vnode_lock(vp);
	error = mac_system_check_swapon(vfs_context_ucred(ctx), vp);
	vnode_unlock(vp);
	if (error)
		goto swapon_bailout;
#endif

	/* resize to desired size if it's too small */
	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
		goto swapon_bailout;

	if (default_pager_init_flag == 0) {
		start_def_pager(NULL);
		default_pager_init_flag = 1;
	}

	/* add new backing store to list */
	i = 0;
	while(bs_port_table[i].vp != 0) {
		if(i == MAX_BACKING_STORE)
			break;
		i++;
	}
	if(i == MAX_BACKING_STORE) {
	   	error = ENOMEM;
		goto swapon_bailout;
	}

	/* remember the vnode. This vnode has namei() reference */
	bs_port_table[i].vp = vp;
	
	/*
	 * Look to see if we are already paging to this file.
	 */
	/* make certain the copy send of kernel call will work */
	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
	kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
	if(kr != KERN_SUCCESS) {
	   error = EAGAIN;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	if (vp->v_mount->mnt_kern_flag & MNTK_SSD) {
		/*
		 * keep the cluster size small since the
		 * seek cost is effectively 0 which means
		 * we don't care much about fragmentation
		 */
		dp_isssd = TRUE;
		dp_cluster_size = 2 * PAGE_SIZE;
	} else {
		/*
		 * use the default cluster size
		 */
		dp_isssd = FALSE;
		dp_cluster_size = 0;
	}
	kr = default_pager_backing_store_create(default_pager, 
					-1, /* default priority */
					dp_cluster_size,
					&backing_store);
	memory_object_default_deallocate(default_pager);

	if(kr != KERN_SUCCESS) {
	   error = ENOMEM;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	/* Mark this vnode as being used for swapfile */
	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);

	/*
	 * NOTE: we are able to supply PAGE_SIZE here instead of
	 *	an actual record size or block number because:
	 *	a: we do not support offsets from the beginning of the
	 *		file (allowing for non page size/record modulo offsets.
	 *	b: because allow paging will be done modulo page size
	 */

	kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp,
				PAGE_SIZE, (int)(file_size/PAGE_SIZE));
	if(kr != KERN_SUCCESS) {
	   bs_port_table[i].vp = 0;
	   if(kr == KERN_INVALID_ARGUMENT)
		error = EINVAL;
	   else 
		error = ENOMEM;

	   /* This vnode is not to be used for swapfile */
	   vnode_lock_spin(vp);
	   CLR(vp->v_flag, VSWAP);
	   vnode_unlock(vp);

	   goto swapon_bailout;
	}
	bs_port_table[i].bs = (void *)backing_store;
	error = 0;

	ubc_setthreadcred(vp, p, current_thread());

	/*
	 * take a long term reference on the vnode to keep
	 * vnreclaim() away from this vnode.
	 */
	vnode_ref(vp);

swapon_bailout:
	if (vp) {
		vnode_put(vp);
	}
	(void) thread_funnel_set(kernel_flock, FALSE);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
Esempio n. 24
0
IOReturn FileNVRAM::write_buffer(char* aBuffer, vfs_context_t aCtx)
{
	IOReturn error = 0;

	int length = (int)strlen(aBuffer);
	
	struct vnode * vp;
	
	if (aCtx)
	{
		if ((error = vnode_open(FILE_NVRAM_PATH, (O_TRUNC | O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR | S_IWUSR, VNODE_LOOKUP_NOFOLLOW, &vp, aCtx)))
		{
			printf("FileNVRAM.kext: Error, vnode_open(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
			
			return error;
		}
		else
		{
			if ((error = vnode_isreg(vp)) == VREG)
			{
				if ((error = vn_rdwr(UIO_WRITE, vp, aBuffer, length, 0, UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(aCtx), (int *) 0, vfs_context_proc(aCtx))))
				{
					printf("FileNVRAM.kext: Error, vn_rdwr(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
				}
				
				if ((error = vnode_close(vp, FWASWRITTEN, aCtx)))
				{
					printf("FileNVRAM.kext: Error, vnode_close(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
				}
			}
			else
			{
				printf("FileNVRAM.kext: Error, vnode_isreg(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
			}
		}
	}
	else
	{
		printf("FileNVRAM.kext: aCtx == NULL!\n");
		error = 0xFFFF; // EINVAL;
	}
	
	return error;
}
Esempio n. 25
0
/*
 *	Routine:	macx_swapoff
 *	Function:
 *		Syscall interface to remove a file from backing store
 */
int
macx_swapoff(
	struct macx_swapoff_args *args)
{
	__unused int	flags = args->flags;
	kern_return_t	kr;
	mach_port_t	backing_store;

	struct vnode		*vp = 0; 
	struct nameidata 	nd, *ndp;
	struct proc		*p =  current_proc();
	int			i;
	int			error;
	boolean_t		funnel_state;
	vfs_context_t ctx = vfs_context_current();

	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF);

	funnel_state = thread_funnel_set(kernel_flock, TRUE);
	backing_store = NULL;
	ndp = &nd;

	if ((error = suser(kauth_cred_get(), 0)))
		goto swapoff_bailout;

	/*
	 * Get the vnode for the paging area.
	 */
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
	       (user_addr_t) args->filename, ctx);

	if ((error = namei(ndp)))
		goto swapoff_bailout;
	nameidone(ndp);
	vp = ndp->ni_vp;

	if (vp->v_type != VREG) {
		error = EINVAL;
		goto swapoff_bailout;
	}
#if CONFIG_MACF
	vnode_lock(vp);
	error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp);
	vnode_unlock(vp);
	if (error)
		goto swapoff_bailout;
#endif

	for(i = 0; i < MAX_BACKING_STORE; i++) {
		if(bs_port_table[i].vp == vp) {
			break;
		}
	}
	if (i == MAX_BACKING_STORE) {
		error = EINVAL;
		goto swapoff_bailout;
	}
	backing_store = (mach_port_t)bs_port_table[i].bs;

	kr = default_pager_backing_store_delete(backing_store);
	switch (kr) {
		case KERN_SUCCESS:
			error = 0;
			bs_port_table[i].vp = 0;
			/* This vnode is no longer used for swapfile */
			vnode_lock_spin(vp);
			CLR(vp->v_flag, VSWAP);
			vnode_unlock(vp);

			/* get rid of macx_swapon() "long term" reference */
			vnode_rele(vp);

			break;
		case KERN_FAILURE:
			error = EAGAIN;
			break;
		default:
			error = EAGAIN;
			break;
	}

swapoff_bailout:
	/* get rid of macx_swapoff() namei() reference */
	if (vp)
		vnode_put(vp);

	(void) thread_funnel_set(kernel_flock, FALSE);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
Esempio n. 26
0
void fileattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp)
{
	attrgroup_t fileattr = aip->ai_attrlist->fileattr;
	void *attrbufptr = *aip->ai_attrbufpp;
	void *varbufptr = *aip->ai_varbufpp;
	uint64_t allocsize = 0;
	cred_t  *cr = (cred_t *)vfs_context_ucred(aip->ai_context);

	if ((ATTR_FILE_ALLOCSIZE | ATTR_FILE_DATAALLOCSIZE) & fileattr && zp) {
		uint32_t  blksize;
		u_longlong_t  nblks;

		sa_object_size(zp->z_sa_hdl, &blksize, &nblks);
		allocsize = (uint64_t)512LL * (uint64_t)nblks;
	}
	if (ATTR_FILE_LINKCOUNT & fileattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((u_int32_t *)attrbufptr) = val;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_TOTALSIZE & fileattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((off_t *)attrbufptr) = val;
		attrbufptr = ((off_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_ALLOCSIZE & fileattr) {
		*((off_t *)attrbufptr) = allocsize;
		attrbufptr = ((off_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_IOBLOCKSIZE & fileattr && zp) {
		*((u_int32_t *)attrbufptr) =
            zp->z_blksz ? zp->z_blksz : zfsvfs->z_max_blksz;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_DEVTYPE & fileattr) {
        uint64_t mode, val=0;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
                         &mode, sizeof(mode)) == 0);
        sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
                  &val, sizeof(val));
		if (S_ISBLK(mode) || S_ISCHR(mode))
			*((u_int32_t *)attrbufptr) = (u_int32_t)val;
		else
			*((u_int32_t *)attrbufptr) = 0;
		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_DATALENGTH & fileattr) {
        uint64_t val;
        VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
                         &val, sizeof(val)) == 0);
		*((off_t *)attrbufptr) = val;
		attrbufptr = ((off_t *)attrbufptr) + 1;
	}
	if (ATTR_FILE_DATAALLOCSIZE & fileattr) {
		*((off_t *)attrbufptr) = allocsize;
		attrbufptr = ((off_t *)attrbufptr) + 1;
	}
	if ((ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE) & fileattr) {
		uint64_t rsrcsize = 0;
        uint64_t xattr;

        if (!sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
                       &xattr, sizeof(xattr)) &&
            xattr) {
			vnode_t *xdvp = NULLVP;
			vnode_t *xvp = NULLVP;
			struct componentname  cn;

			bzero(&cn, sizeof (cn));
			cn.cn_nameiop = LOOKUP;
			cn.cn_flags = ISLASTCN;
			cn.cn_nameptr = XATTR_RESOURCEFORK_NAME;
			cn.cn_namelen = strlen(cn.cn_nameptr);

			/* Grab the hidden attribute directory vnode. */
			if (zfs_get_xattrdir(zp, &xdvp, cr, 0) == 0 &&
			    zfs_dirlook(VTOZ(xdvp), cn.cn_nameptr, &xvp, 0, NULL,
                            &cn) == 0) {
				rsrcsize = VTOZ(xvp)->z_size;
			}
			if (xvp)
				vnode_put(xvp);
			if (xdvp)
				vnode_put(xdvp);
		}
		if (ATTR_FILE_RSRCLENGTH & fileattr) {
			*((off_t *)attrbufptr) = rsrcsize;
			attrbufptr = ((off_t *)attrbufptr) + 1;
		}
		if (ATTR_FILE_RSRCALLOCSIZE & fileattr) {
			*((off_t *)attrbufptr) = roundup(rsrcsize, 512);
			attrbufptr = ((off_t *)attrbufptr) + 1;
		}
	}
	*aip->ai_attrbufpp = attrbufptr;
	*aip->ai_varbufpp = varbufptr;
}
Esempio n. 27
0
int
vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
{
	int error = 0;
	uint64_t io_size = npages * PAGE_SIZE_64;
#if 1
	kern_return_t	kr = KERN_SUCCESS;
	upl_t		upl = NULL;
	unsigned int	count = 0;
	upl_control_flags_t upl_create_flags = 0;
	int		upl_control_flags = 0;
	upl_size_t	upl_size = 0;

	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE
			| UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK);

#if ENCRYPTED_SWAP
	upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
#else
	upl_control_flags = UPL_IOSYNC;
#endif
	if ((flags & SWAP_READ) == FALSE) {
		upl_create_flags |= UPL_COPYOUT_FROM;
	}
 
	upl_size = io_size;
	kr = vm_map_create_upl( kernel_map,
				start,
				&upl_size,
				&upl,
				NULL,
				&count,
				&upl_create_flags);

	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
		panic("vm_map_create_upl failed with %d\n", kr);
	}

	if (flags & SWAP_READ) {
		vnode_pagein(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
#endif /* DEBUG */
		}
	
	} else {
		vnode_pageout(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
#endif /* DEBUG */
		}
	}
	return error;

#else /* 1 */
	vfs_context_t ctx;
	ctx = vfs_context_kernel();
		
	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
		UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));

	if (error) {
		printf("vn_rdwr: Swap I/O failed with %d\n", error);
	}
	return error;
#endif /* 1 */
}
IOReturn VNodeDiskDeviceClass::doAsyncReadWrite(
  IOMemoryDescriptor *buffer, UInt64 block, UInt64 nblks, 
  IOStorageAttributes *attributes, IOStorageCompletion *completion)
{    
  IOLog("doAsyncReadWrite with parameters %llu block num, %llu num of blocks\n", block, nblks);

  if (m_vnode == NULL)
    return kIOReturnIOError;

  IOReturn returnMessage = kIOReturnSuccess;
  if ((block + nblks - 1) >= m_blockNum || nblks == 0) {
    IOLog("Attempting to write outside vnode disk\n");
    return kIOReturnIOError;
  }

  IODirection direction = buffer->getDirection();
  if ((direction != kIODirectionIn) && (direction != kIODirectionOut)) {
    IOLog("No valid direction of transfer: required either in or out\n");
    return kIOReturnIOError;
  }

  IOByteCount actualByteCount = nblks * m_blockSize;
  off_t byteOffset = block * m_blockSize;
  int aresid = -1;

  char * rawBuffer = (char *) buffer;
  rawBuffer = (char *) IOMalloc(sizeof(char) * actualByteCount);
  if (rawBuffer == NULL) {
    IOLog("Unable to allocate buffer\n");
    return kIOReturnIOError;
  }

  vfs_context_t vfsContext = vfs_context_create((vfs_context_t) 0);
  proc_t proc = vfs_context_proc(vfsContext);
  kauth_cred_t cr = vfs_context_ucred(vfsContext);

  if (direction == kIODirectionIn) {
    IOLog("Reading from disk\n");

    // TODO: Remove warning (unsigned long long) -> int
    int readError = vn_rdwr(UIO_READ, m_vnode, (caddr_t) rawBuffer,
      (int) actualByteCount, byteOffset, UIO_SYSSPACE, 0, cr, &aresid, proc);

    if (readError || aresid > 0) {
      returnMessage = kIOReturnIOError;
      goto cleanup;
    }

    buffer->writeBytes(0, rawBuffer, actualByteCount); // Why the offset?
  } else { // (direction == kIODirectionOut)
    IOLog("Writing to disk\n");

    buffer->readBytes(0, rawBuffer, actualByteCount); // first arg is offset

    // TODO: Remove warning (unsigned long long) -> int
    int writeError = vn_rdwr(UIO_WRITE, m_vnode, (caddr_t) rawBuffer,
      (int) actualByteCount, byteOffset, UIO_SYSSPACE, 0, cr, &aresid, proc);

    if (writeError || aresid > 0) {
      returnMessage = kIOReturnIOError;
      goto cleanup;
    }
  }

cleanup:
  vfs_context_rele(vfsContext);
  if (rawBuffer)
    IOFree(rawBuffer, sizeof(char) * actualByteCount);

  actualByteCount = actualByteCount > aresid ? actualByteCount - aresid : 0;

  completion->action(completion->target, completion->parameter, kIOReturnSuccess, 
    actualByteCount);
  return returnMessage;
}
Esempio n. 29
0
static errno_t
fuse_vfsop_mount(mount_t mp, __unused vnode_t devvp, user_addr_t udata,
                 vfs_context_t context)
{
    int err      = 0;
    int mntopts  = 0;
    bool mounted = false;

    uint32_t max_read = ~0;

    size_t len;

    fuse_device_t      fdev = NULL;
    struct fuse_data  *data = NULL;
    fuse_mount_args    fusefs_args;
    struct vfsstatfs  *vfsstatfsp = vfs_statfs(mp);

#if M_FUSE4X_ENABLE_BIGLOCK
    lck_mtx_t         *biglock;
#endif

    fuse_trace_printf_vfsop();

    if (vfs_isupdate(mp)) {
        return ENOTSUP;
    }

    err = copyin(udata, &fusefs_args, sizeof(fusefs_args));
    if (err) {
        return EINVAL;
    }

    /*
     * Interesting flags that we can receive from mount or may want to
     * otherwise forcibly set include:
     *
     *     MNT_ASYNC
     *     MNT_AUTOMOUNTED
     *     MNT_DEFWRITE
     *     MNT_DONTBROWSE
     *     MNT_IGNORE_OWNERSHIP
     *     MNT_JOURNALED
     *     MNT_NODEV
     *     MNT_NOEXEC
     *     MNT_NOSUID
     *     MNT_NOUSERXATTR
     *     MNT_RDONLY
     *     MNT_SYNCHRONOUS
     *     MNT_UNION
     */

    err = ENOTSUP;

#if M_FUSE4X_ENABLE_UNSUPPORTED
    vfs_setlocklocal(mp);
#endif /* M_FUSE4X_ENABLE_UNSUPPORTED */

    /** Option Processing. **/

    if (*fusefs_args.fstypename) {
        size_t typenamelen = strlen(fusefs_args.fstypename);
        if (typenamelen > FUSE_FSTYPENAME_MAXLEN) {
            return EINVAL;
        }
        snprintf(vfsstatfsp->f_fstypename, MFSTYPENAMELEN, "%s%s",
                 FUSE_FSTYPENAME_PREFIX, fusefs_args.fstypename);
    }

    if (!*fusefs_args.fsname)
        return EINVAL;

    if ((fusefs_args.daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) ||
            (fusefs_args.daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT)) {
        return EINVAL;
    }

    if ((fusefs_args.init_timeout > FUSE_MAX_INIT_TIMEOUT) ||
            (fusefs_args.init_timeout < FUSE_MIN_INIT_TIMEOUT)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_SPARSE) {
        mntopts |= FSESS_SPARSE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_CACHE) {
        mntopts |= FSESS_AUTO_CACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_XATTR) {
        if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
            return EINVAL;
        }
        mntopts |= FSESS_AUTO_XATTR;
    } else if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
        mntopts |= FSESS_NATIVE_XATTR;
    }

    if (fusefs_args.altflags & FUSE_MOPT_JAIL_SYMLINKS) {
        mntopts |= FSESS_JAIL_SYMLINKS;
    }

    /*
     * Note that unlike Linux, which keeps allow_root in user-space and
     * passes allow_other in that case to the kernel, we let allow_root
     * reach the kernel. The 'if' ordering is important here.
     */
    if (fusefs_args.altflags & FUSE_MOPT_ALLOW_ROOT) {
        int is_member = 0;
        if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) {
            log("fuse4x: caller is not a member of fuse4x admin group. "
                "Either add user (id=%d) to group (id=%d), "
                "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n",
                kauth_cred_getuid(kauth_cred_get()), fuse_admin_group);
            return EPERM;
        }
        mntopts |= FSESS_ALLOW_ROOT;
    } else if (fusefs_args.altflags & FUSE_MOPT_ALLOW_OTHER) {
        if (!fuse_allow_other && !fuse_vfs_context_issuser(context)) {
            int is_member = 0;
            if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) {
                log("fuse4x: caller is not a member of fuse4x admin group. "
                    "Either add user (id=%d) to group (id=%d), "
                    "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n",
                    kauth_cred_getuid(kauth_cred_get()), fuse_admin_group);
                return EPERM;
            }
        }
        mntopts |= FSESS_ALLOW_OTHER;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEDOUBLE) {
        mntopts |= FSESS_NO_APPLEDOUBLE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEXATTR) {
        mntopts |= FSESS_NO_APPLEXATTR;
    }

    if ((fusefs_args.altflags & FUSE_MOPT_FSID) && (fusefs_args.fsid != 0)) {
        fsid_t   fsid;
        mount_t  other_mp;
        uint32_t target_dev;

        target_dev = FUSE_MAKEDEV(FUSE_CUSTOM_FSID_DEVICE_MAJOR,
                                  fusefs_args.fsid);

        fsid.val[0] = target_dev;
        fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

        other_mp = vfs_getvfs(&fsid);
        if (other_mp != NULL) {
            return EPERM;
        }

        vfsstatfsp->f_fsid.val[0] = target_dev;
        vfsstatfsp->f_fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

    } else {
        vfs_getnewfsid(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_ATTRCACHE) {
        mntopts |= FSESS_NO_ATTRCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_READAHEAD) {
        mntopts |= FSESS_NO_READAHEAD;
    }

    if (fusefs_args.altflags & (FUSE_MOPT_NO_UBC | FUSE_MOPT_DIRECT_IO)) {
        mntopts |= FSESS_NO_UBC;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_VNCACHE) {
        mntopts |= FSESS_NO_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NEGATIVE_VNCACHE) {
        if (mntopts & FSESS_NO_VNCACHE) {
            return EINVAL;
        }
        mntopts |= FSESS_NEGATIVE_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCWRITES) {

        /* Cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'. */
        if (mntopts & (FSESS_NO_READAHEAD | FSESS_NO_UBC)) {
            log("fuse4x: cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'\n");
            return EINVAL;
        }

        mntopts |= FSESS_NO_SYNCWRITES;
        vfs_clearflags(mp, MNT_SYNCHRONOUS);
        vfs_setflags(mp, MNT_ASYNC);

        /* We check for this only if we have nosyncwrites in the first place. */
        if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCONCLOSE) {
            mntopts |= FSESS_NO_SYNCONCLOSE;
        }

    } else {
        vfs_clearflags(mp, MNT_ASYNC);
        vfs_setflags(mp, MNT_SYNCHRONOUS);
    }

    if (mntopts & FSESS_NO_UBC) {
        /* If no buffer cache, disallow exec from file system. */
        vfs_setflags(mp, MNT_NOEXEC);
    }

    vfs_setauthopaque(mp);
    vfs_setauthopaqueaccess(mp);

    if ((fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) &&
            (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) {
        mntopts |= FSESS_DEFAULT_PERMISSIONS;
        vfs_clearauthopaque(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS) {
        mntopts |= FSESS_DEFER_PERMISSIONS;
    }

    if (fusefs_args.altflags & FUSE_MOPT_EXTENDED_SECURITY) {
        mntopts |= FSESS_EXTENDED_SECURITY;
        vfs_setextendedsecurity(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_LOCALVOL) {
        vfs_setflags(mp, MNT_LOCAL);
    }
    /* done checking incoming option bits */

    err = 0;

    vfs_setfsprivate(mp, NULL);

    fdev = fuse_device_get(fusefs_args.rdev);
    if (!fdev) {
        log("fuse4x: invalid device file (number=%d)\n", fusefs_args.rdev);
        return EINVAL;
    }

    fuse_lck_mtx_lock(fdev->mtx);

    data = fdev->data;

    if (!data) {
        fuse_lck_mtx_unlock(fdev->mtx);
        return ENXIO;
    }

#if M_FUSE4X_ENABLE_BIGLOCK
    biglock = data->biglock;
    fuse_biglock_lock(biglock);
#endif

    if (data->dataflags & FSESS_MOUNTED) {
#if M_FUSE4X_ENABLE_BIGLOCK
        fuse_biglock_unlock(biglock);
#endif
        fuse_lck_mtx_unlock(fdev->mtx);
        return EALREADY;
    }

    if (!(data->dataflags & FSESS_OPENED)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = ENXIO;
        goto out;
    }

    data->dataflags |= FSESS_MOUNTED;
    OSAddAtomic(1, (SInt32 *)&fuse_mount_count);
    mounted = true;

    if (fdata_dead_get(data)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = ENOTCONN;
        goto out;
    }

    if (!data->daemoncred) {
        panic("fuse4x: daemon found but identity unknown");
    }

    if (fuse_vfs_context_issuser(context) &&
            kauth_cred_getuid(vfs_context_ucred(context)) != kauth_cred_getuid(data->daemoncred)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = EPERM;
        log("fuse4x: fuse daemon running by user_id=%d does not have privileges to mount on directory %s owned by user_id=%d\n",
            kauth_cred_getuid(data->daemoncred), vfsstatfsp->f_mntonname, kauth_cred_getuid(vfs_context_ucred(context)));
        goto out;
    }

    data->mp = mp;
    data->fdev = fdev;
    data->dataflags |= mntopts;

    data->daemon_timeout.tv_sec =  fusefs_args.daemon_timeout;
    data->daemon_timeout.tv_nsec = 0;
    if (data->daemon_timeout.tv_sec) {
        data->daemon_timeout_p = &(data->daemon_timeout);
    } else {
        data->daemon_timeout_p = NULL;
    }

    data->init_timeout.tv_sec = fusefs_args.init_timeout;
    data->init_timeout.tv_nsec = 0;

    data->max_read = max_read;
    data->fssubtype = fusefs_args.fssubtype;
    data->mountaltflags = fusefs_args.altflags;
    data->noimplflags = (uint64_t)0;

    data->blocksize = fuse_round_size(fusefs_args.blocksize,
                                      FUSE_MIN_BLOCKSIZE, FUSE_MAX_BLOCKSIZE);

    data->iosize = fuse_round_size(fusefs_args.iosize,
                                   FUSE_MIN_IOSIZE, FUSE_MAX_IOSIZE);

    if (data->iosize < data->blocksize) {
        data->iosize = data->blocksize;
    }

    data->userkernel_bufsize = FUSE_DEFAULT_USERKERNEL_BUFSIZE;

    copystr(fusefs_args.fsname, vfsstatfsp->f_mntfromname,
            MNAMELEN - 1, &len);
    bzero(vfsstatfsp->f_mntfromname + len, MNAMELEN - len);

    copystr(fusefs_args.volname, data->volname, MAXPATHLEN - 1, &len);
    bzero(data->volname + len, MAXPATHLEN - len);

    /* previous location of vfs_setioattr() */

    vfs_setfsprivate(mp, data);

    fuse_lck_mtx_unlock(fdev->mtx);

    /* Send a handshake message to the daemon. */
    fuse_send_init(data, context);

    struct vfs_attr vfs_attr;
    VFSATTR_INIT(&vfs_attr);
    // Our vfs_getattr() doesn't look at most *_IS_ACTIVE()'s
    err = fuse_vfsop_getattr(mp, &vfs_attr, context);
    if (!err) {
        vfsstatfsp->f_bsize  = vfs_attr.f_bsize;
        vfsstatfsp->f_iosize = vfs_attr.f_iosize;
        vfsstatfsp->f_blocks = vfs_attr.f_blocks;
        vfsstatfsp->f_bfree  = vfs_attr.f_bfree;
        vfsstatfsp->f_bavail = vfs_attr.f_bavail;
        vfsstatfsp->f_bused  = vfs_attr.f_bused;
        vfsstatfsp->f_files  = vfs_attr.f_files;
        vfsstatfsp->f_ffree  = vfs_attr.f_ffree;
        // vfsstatfsp->f_fsid already handled above
        vfsstatfsp->f_owner  = kauth_cred_getuid(data->daemoncred);
        vfsstatfsp->f_flags  = vfs_flags(mp);
        // vfsstatfsp->f_fstypename already handled above
        // vfsstatfsp->f_mntonname handled elsewhere
        // vfsstatfsp->f_mnfromname already handled above
        vfsstatfsp->f_fssubtype = data->fssubtype;
    }
    if (fusefs_args.altflags & FUSE_MOPT_BLOCKSIZE) {
        vfsstatfsp->f_bsize = data->blocksize;
    } else {
        //data->blocksize = vfsstatfsp->f_bsize;
    }
    if (fusefs_args.altflags & FUSE_MOPT_IOSIZE) {
        vfsstatfsp->f_iosize = data->iosize;
    } else {
        //data->iosize = (uint32_t)vfsstatfsp->f_iosize;
        vfsstatfsp->f_iosize = data->iosize;
    }

out:
    if (err) {
        vfs_setfsprivate(mp, NULL);

        fuse_lck_mtx_lock(fdev->mtx);
        data = fdev->data; /* again */
        if (mounted) {
            OSAddAtomic(-1, (SInt32 *)&fuse_mount_count);
        }
        if (data) {
            data->dataflags &= ~FSESS_MOUNTED;
            if (!(data->dataflags & FSESS_OPENED)) {
#if M_FUSE4X_ENABLE_BIGLOCK
                assert(biglock == data->biglock);
                fuse_biglock_unlock(biglock);
#endif
                fuse_device_close_final(fdev);
                /* data is gone now */
            }
        }
        fuse_lck_mtx_unlock(fdev->mtx);
    } else {
        vnode_t fuse_rootvp = NULLVP;
        err = fuse_vfsop_root(mp, &fuse_rootvp, context);
        if (err) {
            goto out; /* go back and follow error path */
        }
        err = vnode_ref(fuse_rootvp);
        (void)vnode_put(fuse_rootvp);
        if (err) {
            goto out; /* go back and follow error path */
        } else {
            struct vfsioattr ioattr;

            vfs_ioattr(mp, &ioattr);
            ioattr.io_devblocksize = data->blocksize;
            vfs_setioattr(mp, &ioattr);
        }
    }

#if M_FUSE4X_ENABLE_BIGLOCK
    fuse_lck_mtx_lock(fdev->mtx);
    data = fdev->data; /* ...and again */
    if(data) {
        assert(data->biglock == biglock);
        fuse_biglock_unlock(biglock);
    }
    fuse_lck_mtx_unlock(fdev->mtx);
#endif

    return err;
}
Esempio n. 30
0
/*
 * This routine exists to support the load_dylinker().
 *
 * This routine has its own, separate, understanding of the FAT file format,
 * which is terrifically unfortunate.
 */
static
load_return_t
get_macho_vnode(
    char			*path,
    integer_t		archbits,
    struct mach_header	*mach_header,
    off_t			*file_offset,
    off_t			*macho_size,
    struct vnode		**vpp
)
{
    struct vnode		*vp;
    vfs_context_t		ctx = vfs_context_current();
    proc_t			p = vfs_context_proc(ctx);
    kauth_cred_t		kerncred;
    struct nameidata nid, *ndp;
    boolean_t		is_fat;
    struct fat_arch		fat_arch;
    int			error = LOAD_SUCCESS;
    int resid;
    union {
        struct mach_header	mach_header;
        struct fat_header	fat_header;
        char	pad[512];
    } header;
    off_t fsize = (off_t)0;
    int err2;

    /*
     * Capture the kernel credential for use in the actual read of the
     * file, since the user doing the execution may have execute rights
     * but not read rights, but to exec something, we have to either map
     * or read it into the new process address space, which requires
     * read rights.  This is to deal with lack of common credential
     * serialization code which would treat NOCRED as "serialize 'root'".
     */
    kerncred = vfs_context_ucred(vfs_context_kernel());

    ndp = &nid;

    /* init the namei data to point the file user's program name */
    NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx);

    if ((error = namei(ndp)) != 0) {
        if (error == ENOENT) {
            error = LOAD_ENOENT;
        } else {
            error = LOAD_FAILURE;
        }
        return(error);
    }
    nameidone(ndp);
    vp = ndp->ni_vp;

    /* check for regular file */
    if (vp->v_type != VREG) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* get size */
    if ((error = vnode_size(vp, &fsize, ctx)) != 0) {
        error = LOAD_FAILURE;
        goto bad1;
    }

    /* Check mount point */
    if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* check access */
    if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* try to open it */
    if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0,
                         UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) {
        error = LOAD_IOERROR;
        goto bad2;
    }

    if (header.mach_header.magic == MH_MAGIC ||
            header.mach_header.magic == MH_MAGIC_64)
        is_fat = FALSE;
    else if (header.fat_header.magic == FAT_MAGIC ||
             header.fat_header.magic == FAT_CIGAM)
        is_fat = TRUE;
    else {
        error = LOAD_BADMACHO;
        goto bad2;
    }

    if (is_fat) {
        /* Look up our architecture in the fat file. */
        error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch);
        if (error != LOAD_SUCCESS)
            goto bad2;

        /* Read the Mach-O header out of it */
        error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header,
                        sizeof(header.mach_header), fat_arch.offset,
                        UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p);
        if (error) {
            error = LOAD_IOERROR;
            goto bad2;
        }

        /* Is this really a Mach-O? */
        if (header.mach_header.magic != MH_MAGIC &&
                header.mach_header.magic != MH_MAGIC_64) {
            error = LOAD_BADMACHO;
            goto bad2;
        }

        *file_offset = fat_arch.offset;
        *macho_size = fat_arch.size;
    } else {
        /*
         * Force get_macho_vnode() to fail if the architecture bits
         * do not match the expected architecture bits.  This in
         * turn causes load_dylinker() to fail for the same reason,
         * so it ensures the dynamic linker and the binary are in
         * lock-step.  This is potentially bad, if we ever add to
         * the CPU_ARCH_* bits any bits that are desirable but not
         * required, since the dynamic linker might work, but we will
         * refuse to load it because of this check.
         */
        if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits)
            return(LOAD_BADARCH);

        *file_offset = 0;
        *macho_size = fsize;
    }

    *mach_header = header.mach_header;
    *vpp = vp;

    ubc_setsize(vp, fsize);

    return (error);

bad2:
    err2 = VNOP_CLOSE(vp, FREAD, ctx);
    vnode_put(vp);
    return (error);

bad1:
    vnode_put(vp);
    return(error);
}