Example #1
0
int
mac_vnop_setxattr (struct vnode *vp, const char *name, char *buf, size_t len)
{
	vfs_context_t ctx;
	int options = XATTR_NOSECURITY;
	char uio_buf[ UIO_SIZEOF(1) ];
        uio_t auio;
	int error;

	if (vfs_isrdonly(vp->v_mount))
		return (EROFS);

	ctx = vfs_context_current();
	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
				    &uio_buf[0], sizeof(uio_buf));
	uio_addiov(auio, CAST_USER_ADDR_T(buf), len);

	error = vn_setxattr(vp, name, auio, options, ctx);
#if CONFIG_FSE
	if (error == 0) {
		add_fsevent(FSE_XATTR_MODIFIED, ctx,
		    FSE_ARG_VNODE, vp,
		    FSE_ARG_DONE);
	}
#endif

	return (error);
}
RTDECL(int) RTFileOpen(PRTFILE phFile, const char *pszFilename, uint64_t fOpen)
{
    RTFILEINT *pThis = (RTFILEINT *)RTMemAllocZ(sizeof(*pThis));
    if (!pThis)
        return VERR_NO_MEMORY;

    errno_t rc;
    pThis->u32Magic = RTFILE_MAGIC;
    pThis->fOpen    = fOpen;
    pThis->hVfsCtx  = vfs_context_current();
    if (pThis->hVfsCtx != NULL)
    {
        int             fCMode    = (fOpen & RTFILE_O_CREATE_MODE_MASK)
                                  ? (fOpen & RTFILE_O_CREATE_MODE_MASK) >> RTFILE_O_CREATE_MODE_SHIFT
                                  : RT_FILE_PERMISSION;
        int             fVnFlags  = 0; /* VNODE_LOOKUP_XXX */
        int             fOpenMode = 0;
        if (fOpen & RTFILE_O_NON_BLOCK)
            fOpenMode |= O_NONBLOCK;
        if (fOpen & RTFILE_O_WRITE_THROUGH)
            fOpenMode |= O_SYNC;

        /* create/truncate file */
        switch (fOpen & RTFILE_O_ACTION_MASK)
        {
            case RTFILE_O_OPEN:             break;
            case RTFILE_O_OPEN_CREATE:      fOpenMode |= O_CREAT; break;
            case RTFILE_O_CREATE:           fOpenMode |= O_CREAT | O_EXCL; break;
            case RTFILE_O_CREATE_REPLACE:   fOpenMode |= O_CREAT | O_TRUNC; break; /** @todo replacing needs fixing, this is *not* a 1:1 mapping! */
        }
        if (fOpen & RTFILE_O_TRUNCATE)
            fOpenMode |= O_TRUNC;

        switch (fOpen & RTFILE_O_ACCESS_MASK)
        {
            case RTFILE_O_READ:
                fOpenMode |= FREAD;
                break;
            case RTFILE_O_WRITE:
                fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | FWRITE : FWRITE;
                break;
            case RTFILE_O_READWRITE:
                fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | FWRITE | FREAD : FWRITE | FREAD;
                break;
            default:
                AssertMsgFailed(("RTFileOpen received an invalid RW value, fOpen=%#x\n", fOpen));
                return VERR_INVALID_PARAMETER;
        }

        pThis->fOpenMode = fOpenMode;
        rc = vnode_open(pszFilename, fOpenMode, fCMode, fVnFlags, &pThis->hVnode, pThis->hVfsCtx);
        if (rc == 0)
        {
            *phFile = pThis;
            return VINF_SUCCESS;
        }

        rc = RTErrConvertFromErrno(rc);
    }
Example #3
0
static vdev_t *
vdev_lookup_by_path(vdev_t *vd, const char *name)
{
	vdev_t *mvd;
	int c;
	char pathbuf[MAXPATHLEN];
	char *lookup_name;
	int err = 0;

	if (!vd) return NULL;

	// Check both strings are valid
	if (name && *name &&
		vd->vdev_path && vd->vdev_path[0]) {
		int off;
		struct vnode *vp;

		lookup_name = vd->vdev_path;

		// We need to resolve symlinks here to get the final source name
		dprintf("ZFS: Looking up '%s'\n", vd->vdev_path);

		if ((err = vnode_lookup(vd->vdev_path, 0,
								&vp, vfs_context_current())) == 0) {
			int len = MAXPATHLEN;

			if ((err = vn_getpath(vp, pathbuf, &len)) == 0) {
				dprintf("ZFS: '%s' resolved name is '%s'\n",
						vd->vdev_path, pathbuf);
				lookup_name = pathbuf;
			}

			vnode_put(vp);
		}

		if (err) dprintf("ZFS: Lookup failed %d\n", err);

		// Skip /dev/ or not?
		strncmp("/dev/", lookup_name, 5) == 0 ? off=5 : off=0;

		dprintf("ZFS: vdev '%s' == '%s' ?\n", name,
				&lookup_name[off]);

		if (!strcmp(name, &lookup_name[off])) return vd;
	}

	for (c = 0; c < vd->vdev_children; c++)
		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], name)) !=
			NULL)
			return (mvd);

	return (NULL);
}
void
vm_swapfile_open(const char *path, vnode_t *vp)
{
	int error = 0;
	vfs_context_t	ctx = vfs_context_current();

	if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
		printf("Failed to open swap file %d\n", error);
		*vp = NULL;
		return;
	}	

	vnode_put(*vp);
}
Example #5
0
static int
vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop, 
				   __unused dev_t dev, int in_kernel, proc_t p)
{
	vfs_context_t ctx = vfs_context_current();
	struct nameidata nd;
	int error, flags;
	shadow_map_t *	map;
	off_t file_size;

	flags = FREAD|FWRITE;
	if (in_kernel) {
		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
	}
	else {
		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
			   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
			   vniop->vn_file, ctx);
	}
	/* vn_open gives both long- and short-term references */
	error = vn_open(&nd, flags, 0);
	if (error) {
		/* shadow MUST be writable! */
		return (error);
	}
	if (nd.ni_vp->v_type != VREG 
	    || (error = vnode_size(nd.ni_vp, &file_size, ctx))) {
		(void)vn_close(nd.ni_vp, flags, ctx);
		vnode_put(nd.ni_vp);
		return (error ? error : EINVAL);
	}
	map = shadow_map_create(vn->sc_fsize, file_size,
				0, vn->sc_secsize);
	if (map == NULL) {
		(void)vn_close(nd.ni_vp, flags, ctx);
		vnode_put(nd.ni_vp);
		vn->sc_shadow_vp = NULL;
		return (ENOMEM);
	}
	vn->sc_shadow_vp = nd.ni_vp;
	vn->sc_shadow_vid = vnode_vid(nd.ni_vp);
	vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
	vn->sc_shadow_map = map;
	vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */

	/* lose the short-term reference */
	vnode_put(nd.ni_vp);
	return(0);
}
void
vm_swapfile_close(uint64_t path_addr, vnode_t vp)
{
	struct nameidata nd;
	vfs_context_t context = vfs_context_current();
	int error = 0;

	vnode_getwithref(vp);
	vnode_close(vp, 0, context);
	
	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE,
	       path_addr, context);

	error = unlink1(context, &nd, 0);
}
void
vm_swapfile_close(uint64_t path_addr, vnode_t vp)
{
	vfs_context_t context = vfs_context_current();
	int error;

	vnode_getwithref(vp);
	vnode_close(vp, 0, context);
	
	error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
	    UIO_SYSSPACE, 0);

#if DEVELOPMENT || DEBUG
	if (error)
		printf("%s : unlink of %s failed with error %d", __FUNCTION__,
		    (char *)path_addr, error);
#endif
}
Example #8
0
int
mac_vnop_getxattr (struct vnode *vp, const char *name, char *buf, size_t len,
		   size_t *attrlen)
{
	vfs_context_t ctx = vfs_context_current();
	int options = XATTR_NOSECURITY;
	char uio_buf[ UIO_SIZEOF(1) ];
        uio_t auio;
	int error;

	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
				    &uio_buf[0], sizeof(uio_buf));
	uio_addiov(auio, CAST_USER_ADDR_T(buf), len);

	error = vn_getxattr(vp, name, auio, attrlen, options, ctx);
	*attrlen = len - uio_resid(auio);

	return (error);
}
int
vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
{
	int		error = 0;
	uint64_t	file_size = 0;
	vfs_context_t	ctx = NULL;


	ctx = vfs_context_current();

	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);

	if (error) {
		printf("vnode_setsize for swap files failed: %d\n", error);
		goto done;
	}

	error = vnode_size(vp, (off_t*) &file_size, ctx);

	if (error) {
		printf("vnode_size (new file) for swap file failed: %d\n", error);
		goto done;
	}	
	assert(file_size == *size);
	
	if (pin != NULL && *pin != FALSE) {
		error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);

		if (error) {
			printf("pin for swap files failed: %d,  file_size = %lld\n", error, file_size);
			/* this is not fatal, carry on with files wherever they landed */
			*pin = FALSE;
			error = 0;
		}
	}

	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);
done:
	return error;
}
Example #10
0
int
mac_vnop_removexattr (struct vnode *vp, const char *name)
{
	vfs_context_t ctx = vfs_context_current();
	int options = XATTR_NOSECURITY;
	int error;

	if (vfs_isrdonly(vp->v_mount))
		return (EROFS);

	error = vn_removexattr(vp, name, options, ctx);
#if CONFIG_FSE
	if (error == 0) {
		add_fsevent(FSE_XATTR_REMOVED, ctx,
		    FSE_ARG_VNODE, vp,
		    FSE_ARG_DONE);
	}
#endif

	return (error);
}
Example #11
0
static int
sd_callback3(proc_t p, void * args)
{
	struct sd_iterargs * sd = (struct sd_iterargs *)args;
	vfs_context_t ctx = vfs_context_current();

	int setsdstate = sd->setsdstate;

	proc_lock(p);
	p->p_shutdownstate = setsdstate;
	if (p->p_stat != SZOMB) {
	       /*
		* NOTE: following code ignores sig_lock and plays
		* with exit_thread correctly.  This is OK unless we
		* are a multiprocessor, in which case I do not
		* understand the sig_lock.  This needs to be fixed.
		* XXX
		*/
		if (p->exit_thread) {	/* someone already doing it */
			proc_unlock(p);
			/* give him a chance */
			thread_block(THREAD_CONTINUE_NULL);
		} else {
			p->exit_thread = current_thread();
			printf(".");

			sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);

			proc_unlock(p);
			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
					      p->p_pid, 0, 1, 0, 0);
			sd->activecount++;
			exit1(p, 1, (int *)NULL);
		}
	} else {
		proc_unlock(p);
	}

	return PROC_RETURNED;
}
Example #12
0
static int
nsmb_dev_open_nolock(dev_t dev, int oflags, int devtype, struct proc *p)
{
#pragma unused(oflags, devtype, p)
	struct smb_dev *sdp;
	kauth_cred_t cred = vfs_context_ucred(vfs_context_current());

	sdp = SMB_GETDEV(dev);
	if (sdp && (sdp->sd_flags & NSMBFL_OPEN))
		return (EBUSY);
	if (!sdp || minor(dev) == 0) {
		int	avail_minor;

		for (avail_minor = 1; avail_minor < SMBMINORS; avail_minor++)
			if (!SMB_GETDEV(avail_minor))
				break;
		if (avail_minor >= SMBMINORS) {
			SMBERROR("Too many minor devices, %d >= %d !", avail_minor, SMBMINORS);
			return (ENOMEM);
		}
        SMB_MALLOC(sdp, struct smb_dev *, sizeof(*sdp), M_NSMBDEV, M_WAITOK);
		bzero(sdp, sizeof(*sdp));
		dev = makedev(smb_major, avail_minor);
		sdp->sd_devfs = devfs_make_node(dev, DEVFS_CHAR,
						kauth_cred_getuid(cred),
						kauth_cred_getgid(cred),
						0700, "nsmb%x", avail_minor);
		if (!sdp->sd_devfs) {
			SMBERROR("devfs_make_node failed %d\n", avail_minor);
			SMB_FREE(sdp, M_NSMBDEV);
			return (ENOMEM);
		}
		if (avail_minor > smb_minor_hiwat)
			smb_minor_hiwat = avail_minor;
		SMB_GETDEV(dev) = sdp;
		return (EBUSY);
	}
Example #13
0
static int
vniocattach_file(struct vn_softc *vn,
		 struct vn_ioctl_64 *vniop,
		 dev_t dev,
		 int in_kernel,
		 proc_t p)
{
	dev_t	cdev;
	vfs_context_t ctx = vfs_context_current();
	kauth_cred_t cred;
	struct nameidata nd;
	off_t file_size;
	int error, flags;

	flags = FREAD|FWRITE;
	if (in_kernel) {
		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
	}
	else {
		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
			   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
			   vniop->vn_file, ctx);
	}
	/* vn_open gives both long- and short-term references */
	error = vn_open(&nd, flags, 0);
	if (error) {
		if (error != EACCES && error != EPERM && error != EROFS) {
			return (error);
		}
		flags &= ~FWRITE;
		if (in_kernel) {
			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, 
			       vniop->vn_file, ctx);
		}
		else {
			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
				   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
			       vniop->vn_file, ctx);
		}
		error = vn_open(&nd, flags, 0);
		if (error) {
			return (error);
		}
	}
	if (nd.ni_vp->v_type != VREG) {
		error = EINVAL;
	}
	else {
		error = vnode_size(nd.ni_vp, &file_size, ctx);
	}
	if (error != 0) {
		(void) vn_close(nd.ni_vp, flags, ctx);
		vnode_put(nd.ni_vp);
		return (error);
	}
	cred = kauth_cred_proc_ref(p);
	nd.ni_vp->v_flag |= VNOCACHE_DATA;
	error = setcred(nd.ni_vp, cred);
	if (error) {
		(void)vn_close(nd.ni_vp, flags, ctx);
		vnode_put(nd.ni_vp);
		kauth_cred_unref(&cred);
		return(error);
	}
	vn->sc_secsize = DEV_BSIZE;
	vn->sc_fsize = file_size;
	vn->sc_size = file_size / vn->sc_secsize;
	vn->sc_vp = nd.ni_vp;
	vn->sc_vid = vnode_vid(nd.ni_vp);
	vn->sc_open_flags = flags;
	vn->sc_cred = cred;
	cdev = makedev(vndevice_cdev_major, minor(dev));
	vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
				      UID_ROOT, GID_OPERATOR, 
				      0600, "rvn%d", 
				      minor(dev));
	vn->sc_flags |= VNF_INITED;
	if (flags == FREAD)
		vn->sc_flags |= VNF_READONLY;
	/* lose the short-term reference */
	vnode_put(nd.ni_vp);
	return(0);
}
Example #14
0
/*
 *	Routine:	macx_swapoff
 *	Function:
 *		Syscall interface to remove a file from backing store
 */
int
macx_swapoff(
	struct macx_swapoff_args *args)
{
	__unused int	flags = args->flags;
	kern_return_t	kr;
	mach_port_t	backing_store;

	struct vnode		*vp = 0; 
	struct nameidata 	nd, *ndp;
	struct proc		*p =  current_proc();
	int			i;
	int			error;
	boolean_t		funnel_state;
	vfs_context_t ctx = vfs_context_current();

	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF);

	funnel_state = thread_funnel_set(kernel_flock, TRUE);
	backing_store = NULL;
	ndp = &nd;

	if ((error = suser(kauth_cred_get(), 0)))
		goto swapoff_bailout;

	/*
	 * Get the vnode for the paging area.
	 */
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
	       (user_addr_t) args->filename, ctx);

	if ((error = namei(ndp)))
		goto swapoff_bailout;
	nameidone(ndp);
	vp = ndp->ni_vp;

	if (vp->v_type != VREG) {
		error = EINVAL;
		goto swapoff_bailout;
	}
#if CONFIG_MACF
	vnode_lock(vp);
	error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp);
	vnode_unlock(vp);
	if (error)
		goto swapoff_bailout;
#endif

	for(i = 0; i < MAX_BACKING_STORE; i++) {
		if(bs_port_table[i].vp == vp) {
			break;
		}
	}
	if (i == MAX_BACKING_STORE) {
		error = EINVAL;
		goto swapoff_bailout;
	}
	backing_store = (mach_port_t)bs_port_table[i].bs;

	kr = default_pager_backing_store_delete(backing_store);
	switch (kr) {
		case KERN_SUCCESS:
			error = 0;
			bs_port_table[i].vp = 0;
			/* This vnode is no longer used for swapfile */
			vnode_lock_spin(vp);
			CLR(vp->v_flag, VSWAP);
			vnode_unlock(vp);

			/* get rid of macx_swapon() "long term" reference */
			vnode_rele(vp);

			break;
		case KERN_FAILURE:
			error = EAGAIN;
			break;
		default:
			error = EAGAIN;
			break;
	}

swapoff_bailout:
	/* get rid of macx_swapoff() namei() reference */
	if (vp)
		vnode_put(vp);

	(void) thread_funnel_set(kernel_flock, FALSE);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
Example #15
0
/*
 *	Routine:	macx_swapon
 *	Function:
 *		Syscall interface to add a file to backing store
 */
int
macx_swapon(
	struct macx_swapon_args *args)
{
	int			size = args->size;
	vnode_t			vp = (vnode_t)NULL; 
	struct nameidata 	nd, *ndp;
	register int		error;
	kern_return_t		kr;
	mach_port_t		backing_store;
	memory_object_default_t	default_pager;
	int			i;
	boolean_t		funnel_state;
	off_t			file_size;
	vfs_context_t		ctx = vfs_context_current();
	struct proc		*p =  current_proc();
	int			dp_cluster_size;


	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON);
	AUDIT_ARG(value32, args->priority);

	funnel_state = thread_funnel_set(kernel_flock, TRUE);
	ndp = &nd;

	if ((error = suser(kauth_cred_get(), 0)))
		goto swapon_bailout;

	/*
	 * Get a vnode for the paging area.
	 */
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
	       (user_addr_t) args->filename, ctx);

	if ((error = namei(ndp)))
		goto swapon_bailout;
	nameidone(ndp);
	vp = ndp->ni_vp;

	if (vp->v_type != VREG) {
		error = EINVAL;
		goto swapon_bailout;
	}

	/* get file size */
	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
		goto swapon_bailout;
#if CONFIG_MACF
	vnode_lock(vp);
	error = mac_system_check_swapon(vfs_context_ucred(ctx), vp);
	vnode_unlock(vp);
	if (error)
		goto swapon_bailout;
#endif

	/* resize to desired size if it's too small */
	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
		goto swapon_bailout;

	if (default_pager_init_flag == 0) {
		start_def_pager(NULL);
		default_pager_init_flag = 1;
	}

	/* add new backing store to list */
	i = 0;
	while(bs_port_table[i].vp != 0) {
		if(i == MAX_BACKING_STORE)
			break;
		i++;
	}
	if(i == MAX_BACKING_STORE) {
	   	error = ENOMEM;
		goto swapon_bailout;
	}

	/* remember the vnode. This vnode has namei() reference */
	bs_port_table[i].vp = vp;
	
	/*
	 * Look to see if we are already paging to this file.
	 */
	/* make certain the copy send of kernel call will work */
	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
	kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
	if(kr != KERN_SUCCESS) {
	   error = EAGAIN;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	if (vp->v_mount->mnt_kern_flag & MNTK_SSD) {
		/*
		 * keep the cluster size small since the
		 * seek cost is effectively 0 which means
		 * we don't care much about fragmentation
		 */
		dp_isssd = TRUE;
		dp_cluster_size = 2 * PAGE_SIZE;
	} else {
		/*
		 * use the default cluster size
		 */
		dp_isssd = FALSE;
		dp_cluster_size = 0;
	}
	kr = default_pager_backing_store_create(default_pager, 
					-1, /* default priority */
					dp_cluster_size,
					&backing_store);
	memory_object_default_deallocate(default_pager);

	if(kr != KERN_SUCCESS) {
	   error = ENOMEM;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	/* Mark this vnode as being used for swapfile */
	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);

	/*
	 * NOTE: we are able to supply PAGE_SIZE here instead of
	 *	an actual record size or block number because:
	 *	a: we do not support offsets from the beginning of the
	 *		file (allowing for non page size/record modulo offsets.
	 *	b: because allow paging will be done modulo page size
	 */

	kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp,
				PAGE_SIZE, (int)(file_size/PAGE_SIZE));
	if(kr != KERN_SUCCESS) {
	   bs_port_table[i].vp = 0;
	   if(kr == KERN_INVALID_ARGUMENT)
		error = EINVAL;
	   else 
		error = ENOMEM;

	   /* This vnode is not to be used for swapfile */
	   vnode_lock_spin(vp);
	   CLR(vp->v_flag, VSWAP);
	   vnode_unlock(vp);

	   goto swapon_bailout;
	}
	bs_port_table[i].bs = (void *)backing_store;
	error = 0;

	ubc_setthreadcred(vp, p, current_thread());

	/*
	 * take a long term reference on the vnode to keep
	 * vnreclaim() away from this vnode.
	 */
	vnode_ref(vp);

swapon_bailout:
	if (vp) {
		vnode_put(vp);
	}
	(void) thread_funnel_set(kernel_flock, FALSE);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
Example #16
0
static void
proc_shutdown(void)
{
	vfs_context_t ctx = vfs_context_current();
	struct proc *p, *self;
	int delayterm = 0;
	struct sd_filterargs sfargs;
	struct sd_iterargs sdargs;
	int error = 0;
	struct timespec ts;

	/*
	 *	Kill as many procs as we can.  (Except ourself...)
	 */
	self = (struct proc *)current_proc();
	
	/*
	 * Signal the init with SIGTERM so that he does not launch
	 * new processes 
	 */
	p = proc_find(1);
	if (p && p != self) {
		psignal(p, SIGTERM);
	}
	proc_rele(p);

	printf("Killing all processes ");

sigterm_loop:
	/*
	 * send SIGTERM to those procs interested in catching one
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 0;
	sdargs.signo = SIGTERM;
	sdargs.setsdstate = 1;
	sdargs.countproc = 1;
	sdargs.activecount = 0;

	error = 0;
	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);

	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
		proc_list_lock();
		if (proc_shutdown_exitcount != 0) {
			/*
	 		* now wait for up to 30 seconds to allow those procs catching SIGTERM
	 		* to digest it
	 		* as soon as these procs have exited, we'll continue on to the next step
	 		*/
			ts.tv_sec = 30;
			ts.tv_nsec = 0;
			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
			if (error != 0) {
				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
			}
			
		}
		proc_list_unlock();
	}
	if (error == ETIMEDOUT) {
		/*
		 * log the names of the unresponsive tasks
		 */


		proc_list_lock();

		for (p = allproc.lh_first; p; p = p->p_list.le_next) {
			if (p->p_shutdownstate == 1) {
				printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
				sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
			}
		}

		proc_list_unlock();

		delay_for_interval(1000 * 5, 1000 * 1000);
	}

	/*
	 * send a SIGKILL to all the procs still hanging around
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 2;
	sdargs.signo = SIGKILL;
	sdargs.setsdstate = 2;
	sdargs.countproc = 1;
	sdargs.activecount = 0;

	/* post a SIGKILL to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);

	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
		proc_list_lock();
		if (proc_shutdown_exitcount != 0) {
			/*
	 		* wait for up to 60 seconds to allow these procs to exit normally
	 		*
	 		* History:	The delay interval was changed from 100 to 200
	 		*		for NFS requests in particular.
	 		*/
			ts.tv_sec = 60;
			ts.tv_nsec = 0;
			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
			if (error != 0) {
				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
			}
		}
		proc_list_unlock();
	}

	/*
	 * if we still have procs that haven't exited, then brute force 'em
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 3;
	sdargs.signo = 0;
	sdargs.setsdstate = 3;
	sdargs.countproc = 0;
	sdargs.activecount = 0;

	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
	printf("\n");

	/* Now start the termination of processes that are marked for delayed termn */
	if (delayterm == 0) {
		delayterm = 1;
		goto  sigterm_loop;
	}

	sd_closelog(ctx);

	/* drop the ref on initproc */
	proc_rele(initproc);
	printf("continuing\n");
}
Example #17
0
/* ARGSUSED */
int
pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
{
	struct fileproc *rf, *wf;
	struct pipe *rpipe, *wpipe;
	lck_mtx_t   *pmtx;
	int fd, error;

	if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
	        return (ENOMEM);
	
	rpipe = wpipe = NULL;
	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
	        error = ENFILE;
		goto freepipes;
	}
        /*
	 * allocate the space for the normal I/O direction up
	 * front... we'll delay the allocation for the other
	 * direction until a write actually occurs (most likely it won't)...
         */
	error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
        if (error)
	        goto freepipes;

	TAILQ_INIT(&rpipe->pipe_evlist);
	TAILQ_INIT(&wpipe->pipe_evlist);

	error = falloc(p, &rf, &fd, vfs_context_current());
	if (error) {
	        goto freepipes;
	}
	retval[0] = fd;

	/*
	 * for now we'll create half-duplex pipes(refer returns section above). 
	 * this is what we've always supported..
	 */
	rf->f_flag = FREAD;
	rf->f_data = (caddr_t)rpipe;
	rf->f_ops = &pipeops;

	error = falloc(p, &wf, &fd, vfs_context_current());
	if (error) {
		fp_free(p, retval[0], rf);
	        goto freepipes;
	}
	wf->f_flag = FWRITE;
	wf->f_data = (caddr_t)wpipe;
	wf->f_ops = &pipeops;

	rpipe->pipe_peer = wpipe;
	wpipe->pipe_peer = rpipe;
	/* both structures share the same mutex */
	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 

	retval[1] = fd;
#if CONFIG_MACF
	/*
	 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
	 *
	 * struct pipe represents a pipe endpoint.  The MAC label is shared
	 * between the connected endpoints.  As a result mac_pipe_label_init() and
	 * mac_pipe_label_associate() should only be called on one of the endpoints
	 * after they have been connected.
	 */
	mac_pipe_label_init(rpipe);
	mac_pipe_label_associate(kauth_cred_get(), rpipe);
	wpipe->pipe_label = rpipe->pipe_label;
#endif
	proc_fdlock_spin(p);
	procfdtbl_releasefd(p, retval[0], NULL);
	procfdtbl_releasefd(p, retval[1], NULL);
	fp_drop(p, retval[0], rf, 1);
	fp_drop(p, retval[1], wf, 1);
	proc_fdunlock(p);


	return (0);

freepipes:
	pipeclose(rpipe); 
	pipeclose(wpipe); 
	lck_mtx_free(pmtx, pipe_mtx_grp);

	return (error);
}
int
vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
{
	int		error = 0;
	uint64_t	file_size = 0;
	vfs_context_t	ctx = NULL;


	ctx = vfs_context_current();

#if CONFIG_PROTECT
	{
#if 0	// <rdar://11771612>

		if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
			if(config_protect_bug) {
				printf("swap protection class set failed with %d\n", error);
			} else {
				panic("swap protection class set failed with %d\n", error);
			}
		}
#endif
		/* initialize content protection keys manually */
		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
			printf("Content Protection key failure on swap: %d\n", error);
			vnode_put(vp);
			vp = NULL;
			goto done;
 		}
	}
#endif
	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);

	if (error) {
		printf("vnode_setsize for swap files failed: %d\n", error);
		goto done;
	}

	error = vnode_size(vp, (off_t*) &file_size, ctx);

	if (error) {
		printf("vnode_size (new file) for swap file failed: %d\n", error);
		goto done;
	}	
	assert(file_size == *size);
	
	if (pin != NULL && *pin != FALSE) {

		assert(vnode_tag(vp) == VT_HFS);

		error = hfs_pin_vnode(VTOHFS(vp), vp, HFS_PIN_IT | HFS_DATALESS_PIN, NULL, ctx);

		if (error) {
			printf("hfs_pin_vnode for swap files failed: %d\n", error);
			/* this is not fatal, carry on with files wherever they landed */
			*pin = FALSE;
			error = 0;
		}
	}

	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);
done:
	return error;
}
Example #19
0
bool FileNVRAM::start(IOService *provider)
{
	bool earlyInit = false;

	LOG(NOTICE, "start() called (%d)\n", mInitComplete);

	//start is called upon wake for some reason.
	if (mInitComplete)
	{
		return true;
	}

	if (!super::start(provider))
	{
		return false;
	}
	
	LOG(NOTICE, "start() called (%d)\n", mInitComplete);

	// mFilePath		= NULL;			// no know file
	mLoggingLevel   = NOTICE;		// start with logging disabled, can be update for debug
	mInitComplete   = false;		// Don't resync anything that's already in the file system.
	mSafeToSync     = false;		// Don't sync untill later

	// We should be root right now... cache this for later.
	mCtx            = vfs_context_current();

	// Register Power modes
	PMinit();
	registerPowerDriver(this, sPowerStates, sizeof(sPowerStates) / sizeof(IOPMPowerState));
	provider->joinPMtree(this);

	IORegistryEntry* bootnvram = IORegistryEntry::fromPath(NVRAM_FILE_DT_LOCATION, gIODTPlane);
	IORegistryEntry* root = IORegistryEntry::fromPath("/", gIODTPlane);

	// Create the command gate.
	mCommandGate = IOCommandGate::commandGate( this, dispatchCommand);
	getWorkLoop()->addEventSource( mCommandGate );

	// Replace the IOService dicionary with an empty one, clean out variables we don't want.
	OSDictionary* dict = OSDictionary::withCapacity(1);

	if (!dict)
	{
		return false;
	}

	setPropertyTable(dict);

	if (bootnvram)
	{
		copyEntryProperties(NULL, bootnvram);
		bootnvram->detachFromParent(root, gIODTPlane);
	}
	else
	{
		IOTimerEventSource* mTimer = IOTimerEventSource::timerEventSource(this, timeoutOccurred);

		if (mTimer)
		{
			getWorkLoop()->addEventSource( mTimer);
			mTimer->setTimeoutMS(50); // callback isn't being setup right, causes a panic
			mSafeToSync = false;
		}
		else
		{
			earlyInit = true;
		}
	}

	// We don't have initial NVRAM data from the bootloader, or we couldn't schedule a
	// timer to read in /Extra/NVRAM/nvram.plist, so start up immediately.
	if (earlyInit == true)
	{
		mSafeToSync = true;
		registerNVRAM();
	}

	mInitComplete = true;

	return true;
}
Example #20
0
/*
 * Accounting system call.  Written based on the specification and
 * previous implementation done by Mark Tinguely.
 */
int
acct(proc_t p, struct acct_args *uap, __unused int *retval)
{
	struct nameidata nd;
	int error;
	struct vfs_context *ctx; 

	ctx = vfs_context_current();

	/* Make sure that the caller is root. */
	if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
		return (error);

	/*
	 * If accounting is to be started to a file, open that file for
	 * writing and make sure it's a 'normal'.
	 */
	if (uap->path != USER_ADDR_NULL) {
		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, ctx);
		if ((error = vn_open(&nd, FWRITE, 0)))
			return (error);
#if CONFIG_MACF
		error = mac_system_check_acct(vfs_context_ucred(ctx), nd.ni_vp);
		if (error) {
			vnode_put(nd.ni_vp);
			vn_close(nd.ni_vp, FWRITE, ctx);
			return (error);
		}
#endif
		vnode_put(nd.ni_vp);

		if (nd.ni_vp->v_type != VREG) {
			vn_close(nd.ni_vp, FWRITE, ctx);
			return (EACCES);
		}
	}
#if CONFIG_MACF
	else {
		error = mac_system_check_acct(vfs_context_ucred(ctx), NULL);
		if (error)
			return (error);
	}
#endif

	/*
	 * If accounting was previously enabled, kill the old space-watcher,
	 * close the file, and (if no new file was specified, leave).
	 */
	if (acctp != NULLVP || suspend_acctp != NULLVP) {
		untimeout(acctwatch_funnel, NULL);
		error = vn_close((acctp != NULLVP ? acctp : suspend_acctp),
				FWRITE, vfs_context_current());

		acctp = suspend_acctp = NULLVP;
	}
	if (uap->path == USER_ADDR_NULL)
		return (error);

	/*
	 * Save the new accounting file vnode, and schedule the new
	 * free space watcher.
	 */
	acctp = nd.ni_vp;
	acctwatch(NULL);
	return (error);
}
Example #21
0
struct kern_direct_file_io_ref_t *
kern_open_file_for_direct_io(const char * name, 
			     kern_get_file_extents_callback_t callback, 
			     void * callback_ref,
			     dev_t * partition_device_result,
			     dev_t * image_device_result,
                             uint64_t * partitionbase_result,
                             uint64_t * maxiocount_result,
                             uint32_t * oflags,
                             off_t offset,
                             caddr_t addr,
                             vm_size_t len)
{
    struct kern_direct_file_io_ref_t * ref;

    proc_t			p;
    struct vnode_attr		va;
    int				error;
    off_t			f_offset;
    uint64_t                    fileblk;
    size_t                      filechunk;
    uint64_t                    physoffset;
    dev_t			device;
    dev_t			target = 0;
    int			        isssd = 0;
    uint32_t                    flags = 0;
    uint32_t			blksize;
    off_t 			maxiocount, count;
    boolean_t                   locked = FALSE;

    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
    void * p1 = NULL;
    void * p2 = NULL;

    error = EFAULT;

    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
    if (!ref)
    {
	error = EFAULT;
    	goto out;
    }

    bzero(ref, sizeof(*ref));
    p = kernproc;
    ref->ctx = vfs_context_create(vfs_context_current());

    if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx)))
        goto out;

    if (addr && len)
    {
	if ((error = kern_write_file(ref, offset, addr, len)))
	    goto out;
    }

    VATTR_INIT(&va);
    VATTR_WANTED(&va, va_rdev);
    VATTR_WANTED(&va, va_fsid);
    VATTR_WANTED(&va, va_data_size);
    VATTR_WANTED(&va, va_nlink);
    error = EFAULT;
    if (vnode_getattr(ref->vp, &va, ref->ctx))
    	goto out;

    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
    kprintf("vp size %qd\n", va.va_data_size);

    if (ref->vp->v_type == VREG)
    {
	/* Don't dump files with links. */
	if (va.va_nlink != 1)
	    goto out;

        device = va.va_fsid;
        p1 = &device;
        p2 = p;
        do_ioctl = &file_ioctl;
    }
    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
	/* Partition. */
        device = va.va_rdev;

        p1 = ref->vp;
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
    }
    else
    {
	/* Don't dump to non-regular files. */
	error = EFAULT;
        goto out;
    }
    ref->device = device;

    // get block size

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
    if (error)
        goto out;

    if (ref->vp->v_type == VREG)
        ref->filelength = va.va_data_size;
    else
    {
        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
        if (error)
            goto out;
	ref->filelength = fileblk * ref->blksize;    
    }

    // pin logical extents

    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
    if (error && (ENOTTY != error)) goto out;
    ref->pinned = (error == 0);

    // generate the block list

    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
    if (error)
        goto out;
    locked = TRUE;

    f_offset = 0;
    while (f_offset < ref->filelength) 
    {
        if (ref->vp->v_type == VREG)
        {
            filechunk = 1*1024*1024*1024;
            daddr64_t blkno;

            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
            if (error)
                goto out;

            fileblk = blkno * ref->blksize;
        }
        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
        {
            fileblk = f_offset;
            filechunk = f_offset ? 0 : ref->filelength;
        }

        physoffset = 0;
        while (physoffset < filechunk)
        {
            dk_physical_extent_t getphysreq;
            bzero(&getphysreq, sizeof(getphysreq));

            getphysreq.offset = fileblk + physoffset;
            getphysreq.length = (filechunk - physoffset);
            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
            if (error)
                goto out;
            if (!target)
            {
                target = getphysreq.dev;
            }
            else if (target != getphysreq.dev)
            {
                error = ENOTSUP;
                goto out;
            }
            callback(callback_ref, getphysreq.offset, getphysreq.length);
            physoffset += getphysreq.length;
        }
        f_offset += filechunk;
    }
    callback(callback_ref, 0ULL, 0ULL);

    if (ref->vp->v_type == VREG)
        p1 = &target;

    // get partition base

    error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
    if (error)
        goto out;

    // get block size & constraints

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
    if (error)
        goto out;

    maxiocount = 1*1024*1024*1024;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    kprintf("max io 0x%qx bytes\n", maxiocount);
    if (maxiocount_result)
        *maxiocount_result = maxiocount;

    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
    if (!error && isssd)
        flags |= kIOHibernateOptionSSD;

    if (partition_device_result)
        *partition_device_result = device;
    if (image_device_result)
        *image_device_result = target;
    if (flags)
        *oflags = flags;

out:
    kprintf("kern_open_file_for_direct_io(%d)\n", error);

    if (error && locked)
    {
        p1 = &device;
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
    }

    if (error && ref)
    {
	if (ref->vp)
	{
	    vnode_close(ref->vp, FWRITE, ref->ctx);
	    ref->vp = NULLVP;
	}
	vfs_context_rele(ref->ctx);
	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
	ref = NULL;
    }
    return(ref);
}
Example #22
0
int
vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
{
	int		error = 0;
	uint64_t	file_size = 0;
	vfs_context_t	ctx = NULL;


	ctx = vfs_context_current();

#if CONFIG_PROTECT
	{
#if 0	// <rdar://11771612>

		if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
			if(config_protect_bug) {
				printf("swap protection class set failed with %d\n", error);
			} else {
				panic("swap protection class set failed with %d\n", error);
			}
		}
#endif
		/* initialize content protection keys manually */
		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
			printf("Content Protection key failure on swap: %d\n", error);
			vnode_put(vp);
			vp = NULL;
			goto done;
 		}
	}
#endif

	/*
  	 * This check exists because dynamic_pager creates the 1st swapfile,
	 * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc).
	 * 
	 * If dynamic_pager, in the future, discontinues creating that file,
	 * then we need to change this check to a panic / assert or return an error.
	 * That's because we can't be sure if the file has been created correctly.
	 */

	if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) {

		printf("vnode_size (existing files) for swap files failed: %d\n", error);
		goto done;
	} else {
	
		if (file_size == 0) {

			error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
		
			if (error) {
				printf("vnode_setsize for swap files failed: %d\n", error);
				goto done;
			}
		} else {

			*size = file_size;
		}
	}

	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);
done:
	return error;
}
Example #23
0
int spl_vfs_root(mount_t mount, struct vnode **vp)
{
    return VFS_ROOT(mount, vp, vfs_context_current() );
}
Example #24
0
kern_return_t
map_fd_funneled(
	int			fd,
	vm_object_offset_t	offset,
	vm_offset_t		*va,
	boolean_t		findspace,
	vm_size_t		size)
{
	kern_return_t	result;
	struct fileproc	*fp;
	struct vnode	*vp;
	void *	pager;
	vm_offset_t	map_addr=0;
	vm_size_t	map_size;
	int		err=0;
	vm_map_t	my_map;
	proc_t		p = current_proc();
	struct vnode_attr vattr;

	/*
	 *	Find the inode; verify that it's a regular file.
	 */

	err = fp_lookup(p, fd, &fp, 0);
	if (err)
		return(err);
	
	if (fp->f_fglob->fg_type != DTYPE_VNODE){
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}

	if (!(fp->f_fglob->fg_flag & FREAD)) {
		err = KERN_PROTECTION_FAILURE;
		goto bad;
	}

	vp = (struct vnode *)fp->f_fglob->fg_data;
	err = vnode_getwithref(vp);
	if(err != 0) 
		goto bad;

	if (vp->v_type != VREG) {
		(void)vnode_put(vp);
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}

	AUDIT_ARG(vnpath, vp, ARG_VNODE1);

	/*
	 * POSIX: mmap needs to update access time for mapped files
	 */
	if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
		VATTR_INIT(&vattr);
		nanotime(&vattr.va_access_time);
		VATTR_SET_ACTIVE(&vattr, va_access_time);
		vnode_setattr(vp, &vattr, vfs_context_current());
	}
	
	if (offset & PAGE_MASK_64) {
		printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
		(void)vnode_put(vp);
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}
	map_size = round_page(size);

	/*
	 * Allow user to map in a zero length file.
	 */
	if (size == 0) {
		(void)vnode_put(vp);
		err = KERN_SUCCESS;
		goto bad;
	}
	/*
	 *	Map in the file.
	 */
	pager = (void *)ubc_getpager(vp);
	if (pager == NULL) {
		(void)vnode_put(vp);
		err = KERN_FAILURE;
		goto bad;
	}


	my_map = current_map();

	result = vm_map_64(
			my_map,
			&map_addr, map_size, (vm_offset_t)0, 
			VM_FLAGS_ANYWHERE, pager, offset, TRUE,
			VM_PROT_DEFAULT, VM_PROT_ALL,
			VM_INHERIT_DEFAULT);
	if (result != KERN_SUCCESS) {
		(void)vnode_put(vp);
		err = result;
		goto bad;
	}


	if (!findspace) {
		vm_offset_t	dst_addr;
		vm_map_copy_t	tmp;

		if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr))	||
					trunc_page_32(dst_addr) != dst_addr) {
			(void) vm_map_remove(
					my_map,
					map_addr, map_addr + map_size,
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = KERN_INVALID_ADDRESS;
			goto bad;
		}

		result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
				       (vm_map_size_t)map_size, TRUE, &tmp);
		if (result != KERN_SUCCESS) {
			
			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
					vm_map_round_page(map_addr + map_size),
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = result;
			goto bad;
		}

		result = vm_map_copy_overwrite(my_map,
					(vm_map_address_t)dst_addr, tmp, FALSE);
		if (result != KERN_SUCCESS) {
			vm_map_copy_discard(tmp);
			(void)vnode_put(vp);
			err = result;
			goto bad;
		}
	} else {
		if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
					vm_map_round_page(map_addr + map_size),
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = KERN_INVALID_ADDRESS;
			goto bad;
		}
	}

	ubc_setthreadcred(vp, current_proc(), current_thread());
	(void)ubc_map(vp, (PROT_READ | PROT_EXEC));
	(void)vnode_put(vp);
	err = 0;
bad:
	fp_drop(p, fd, fp, 0);
	return (err);
}
Example #25
0
/*
 * This routine exists to support the load_dylinker().
 *
 * This routine has its own, separate, understanding of the FAT file format,
 * which is terrifically unfortunate.
 */
static
load_return_t
get_macho_vnode(
    char			*path,
    integer_t		archbits,
    struct mach_header	*mach_header,
    off_t			*file_offset,
    off_t			*macho_size,
    struct vnode		**vpp
)
{
    struct vnode		*vp;
    vfs_context_t		ctx = vfs_context_current();
    proc_t			p = vfs_context_proc(ctx);
    kauth_cred_t		kerncred;
    struct nameidata nid, *ndp;
    boolean_t		is_fat;
    struct fat_arch		fat_arch;
    int			error = LOAD_SUCCESS;
    int resid;
    union {
        struct mach_header	mach_header;
        struct fat_header	fat_header;
        char	pad[512];
    } header;
    off_t fsize = (off_t)0;
    int err2;

    /*
     * Capture the kernel credential for use in the actual read of the
     * file, since the user doing the execution may have execute rights
     * but not read rights, but to exec something, we have to either map
     * or read it into the new process address space, which requires
     * read rights.  This is to deal with lack of common credential
     * serialization code which would treat NOCRED as "serialize 'root'".
     */
    kerncred = vfs_context_ucred(vfs_context_kernel());

    ndp = &nid;

    /* init the namei data to point the file user's program name */
    NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx);

    if ((error = namei(ndp)) != 0) {
        if (error == ENOENT) {
            error = LOAD_ENOENT;
        } else {
            error = LOAD_FAILURE;
        }
        return(error);
    }
    nameidone(ndp);
    vp = ndp->ni_vp;

    /* check for regular file */
    if (vp->v_type != VREG) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* get size */
    if ((error = vnode_size(vp, &fsize, ctx)) != 0) {
        error = LOAD_FAILURE;
        goto bad1;
    }

    /* Check mount point */
    if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* check access */
    if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* try to open it */
    if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0,
                         UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) {
        error = LOAD_IOERROR;
        goto bad2;
    }

    if (header.mach_header.magic == MH_MAGIC ||
            header.mach_header.magic == MH_MAGIC_64)
        is_fat = FALSE;
    else if (header.fat_header.magic == FAT_MAGIC ||
             header.fat_header.magic == FAT_CIGAM)
        is_fat = TRUE;
    else {
        error = LOAD_BADMACHO;
        goto bad2;
    }

    if (is_fat) {
        /* Look up our architecture in the fat file. */
        error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch);
        if (error != LOAD_SUCCESS)
            goto bad2;

        /* Read the Mach-O header out of it */
        error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header,
                        sizeof(header.mach_header), fat_arch.offset,
                        UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p);
        if (error) {
            error = LOAD_IOERROR;
            goto bad2;
        }

        /* Is this really a Mach-O? */
        if (header.mach_header.magic != MH_MAGIC &&
                header.mach_header.magic != MH_MAGIC_64) {
            error = LOAD_BADMACHO;
            goto bad2;
        }

        *file_offset = fat_arch.offset;
        *macho_size = fat_arch.size;
    } else {
        /*
         * Force get_macho_vnode() to fail if the architecture bits
         * do not match the expected architecture bits.  This in
         * turn causes load_dylinker() to fail for the same reason,
         * so it ensures the dynamic linker and the binary are in
         * lock-step.  This is potentially bad, if we ever add to
         * the CPU_ARCH_* bits any bits that are desirable but not
         * required, since the dynamic linker might work, but we will
         * refuse to load it because of this check.
         */
        if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits)
            return(LOAD_BADARCH);

        *file_offset = 0;
        *macho_size = fsize;
    }

    *mach_header = header.mach_header;
    *vpp = vp;

    ubc_setsize(vp, fsize);

    return (error);

bad2:
    err2 = VNOP_CLOSE(vp, FREAD, ctx);
    vnode_put(vp);
    return (error);

bad1:
    vnode_put(vp);
    return(error);
}
Example #26
0
/*
 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
 * XXX usage is PROT_* from an interface perspective.  Thus the values of
 * XXX VM_PROT_* and PROT_* need to correspond.
 */
int
mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
{
	/*
	 *	Map in special device (must be SHARED) or file
	 */
	struct fileproc *fp;
	register struct		vnode *vp;
	int			flags;
	int			prot, file_prot;
	int			err=0;
	vm_map_t		user_map;
	kern_return_t		result;
	mach_vm_offset_t	user_addr;
	mach_vm_size_t		user_size;
	vm_object_offset_t	pageoff;
	vm_object_offset_t	file_pos;
	int			alloc_flags=0;
	boolean_t		docow;
	vm_prot_t		maxprot;
	void 			*handle;
	vm_pager_t		pager;
	int 			mapanon=0;
	int 			fpref=0;
	int error =0;
	int fd = uap->fd;

	user_addr = (mach_vm_offset_t)uap->addr;
	user_size = (mach_vm_size_t) uap->len;

	AUDIT_ARG(addr, user_addr);
	AUDIT_ARG(len, user_size);
	AUDIT_ARG(fd, uap->fd);

	prot = (uap->prot & VM_PROT_ALL);
#if 3777787
	/*
	 * Since the hardware currently does not support writing without
	 * read-before-write, or execution-without-read, if the request is
	 * for write or execute access, we must imply read access as well;
	 * otherwise programs expecting this to work will fail to operate.
	 */
	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
		prot |= VM_PROT_READ;
#endif	/* radar 3777787 */

	flags = uap->flags;
	vp = NULLVP;

	/*
	 * The vm code does not have prototypes & compiler doesn't do the'
	 * the right thing when you cast 64bit value and pass it in function 
	 * call. So here it is.
	 */
	file_pos = (vm_object_offset_t)uap->pos;


	/* make sure mapping fits into numeric range etc */
	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
		return (EINVAL);

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (file_pos & PAGE_MASK);
	file_pos -= (vm_object_offset_t)pageoff;


	/* Adjust size for rounding (on both ends). */
	user_size += pageoff;			/* low end... */
	user_size = mach_vm_round_page(user_size);	/* hi end */


	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & MAP_FIXED) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		user_addr -= pageoff;
		if (user_addr & PAGE_MASK)
		return (EINVAL);
	}
#ifdef notyet
	/* DO not have apis to get this info, need to wait till then*/
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
		addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);

#endif

	alloc_flags = 0;

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.  Use positive fds as the alias
		 * value for memory tracking. 
		 */
		if (fd != -1) {
			/*
			 * Use "fd" to pass (some) Mach VM allocation flags,
			 * (see the VM_FLAGS_* definitions).
			 */
			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
					    VM_FLAGS_PURGABLE);
			if (alloc_flags != fd) {
				/* reject if there are any extra flags */
				return EINVAL;
			}
		}
			
		handle = NULL;
		maxprot = VM_PROT_ALL;
		file_pos = 0;
		mapanon = 1;
	} else {
		struct vnode_attr va;
		vfs_context_t ctx = vfs_context_current();

		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		err = fp_lookup(p, fd, &fp, 0);
		if (err)
			return(err);
		fpref = 1;
		if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
			uap->addr = (user_addr_t)user_addr;
			uap->len = (user_size_t)user_size;
			uap->prot = prot;
			uap->flags = flags;
			uap->pos = file_pos;
			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
			goto bad;
		}

		if (fp->f_fglob->fg_type != DTYPE_VNODE) {
			error = EINVAL;
			goto bad;
		}
		vp = (struct vnode *)fp->f_fglob->fg_data;
		error = vnode_getwithref(vp);
		if(error != 0)
			goto bad;

		if (vp->v_type != VREG && vp->v_type != VCHR) {
			(void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
		
		/*
		 * POSIX: mmap needs to update access time for mapped files
		 */
		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
			VATTR_INIT(&va);
			nanotime(&va.va_access_time);
			VATTR_SET_ACTIVE(&va, va_access_time);
			vnode_setattr(vp, &va, ctx);
		}
		
		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR || vp->v_type == VSTR) {
			(void)vnode_put(vp);
			error = ENODEV;
			goto bad;
		} else {
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_fglob->fg_flag & FREAD)
				maxprot |= VM_PROT_READ;
			else if (prot & PROT_READ) {
				(void)vnode_put(vp);
				error = EACCES;
				goto bad;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out. 
			 */

			if ((flags & MAP_SHARED) != 0) {
				if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
 					/*
 					 * check for write access
 					 *
 					 * Note that we already made this check when granting FWRITE
 					 * against the file, so it seems redundant here.
 					 */
 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
 
 					/* if not granted for any reason, but we wanted it, bad */
 					if ((prot & PROT_WRITE) && (error != 0)) {
 						vnode_put(vp);
  						goto bad;
  					}
 
 					/* if writable, remember */
 					if (error == 0)
  						maxprot |= VM_PROT_WRITE;

				} else if ((prot & PROT_WRITE) != 0) {
					(void)vnode_put(vp);
					error = EACCES;
					goto bad;
				}
			} else
				maxprot |= VM_PROT_WRITE;

			handle = (void *)vp;
#if CONFIG_MACF
			error = mac_file_check_mmap(vfs_context_ucred(ctx),
			    fp->f_fglob, prot, flags, &maxprot);
			if (error) {
				(void)vnode_put(vp);
				goto bad;
			}
#endif /* MAC */
		}
	}

	if (user_size == 0)  {
		if (!mapanon)
			(void)vnode_put(vp);
		error = 0;
		goto bad;
	}

	/*
	 *	We bend a little - round the start and end addresses
	 *	to the nearest page boundary.
	 */
	user_size = mach_vm_round_page(user_size);

	if (file_pos & PAGE_MASK_64) {
		if (!mapanon)
			(void)vnode_put(vp);
		error = EINVAL;
		goto bad;
	}

	user_map = current_map();

	if ((flags & MAP_FIXED) == 0) {
		alloc_flags |= VM_FLAGS_ANYWHERE;
		user_addr = mach_vm_round_page(user_addr);
	} else {
		if (user_addr != mach_vm_trunc_page(user_addr)) {
		        if (!mapanon)
			        (void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}
		/*
		 * mmap(MAP_FIXED) will replace any existing mappings in the
		 * specified range, if the new mapping is successful.
		 * If we just deallocate the specified address range here,
		 * another thread might jump in and allocate memory in that
		 * range before we get a chance to establish the new mapping,
		 * and we won't have a chance to restore the old mappings.
		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
		 * has to deallocate the existing mappings and establish the
		 * new ones atomically.
		 */
		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
	}

	if (flags & MAP_NOCACHE)
		alloc_flags |= VM_FLAGS_NO_CACHE;

	/*
	 * Lookup/allocate object.
	 */
	if (handle == NULL) {
		pager = NULL;
#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 IPC_PORT_NULL, 0, FALSE,
						 prot, maxprot,
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);
		if (result != KERN_SUCCESS) 
				goto out;
	} else {
		pager = (vm_pager_t)ubc_getpager(vp);
		
		if (pager == NULL) {
			(void)vnode_put(vp);
			error = ENOMEM;
			goto bad;
		}

		/*
		 *  Set credentials:
		 *	FIXME: if we're writing the file we need a way to
		 *      ensure that someone doesn't replace our R/W creds
		 * 	with ones that only work for read.
		 */

		ubc_setthreadcred(vp, p, current_thread());
		docow = FALSE;
		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
			docow = TRUE;
		}

#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif /* notyet */

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 (ipc_port_t)pager, file_pos,
						 docow, prot, maxprot, 
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		if (result != KERN_SUCCESS)  {
				(void)vnode_put(vp);
				goto out;
		}

		file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
		if (docow) {
			/* private mapping: won't write to the file */
			file_prot &= ~PROT_WRITE;
		}
		(void) ubc_map(vp, file_prot);
	}

	if (!mapanon)
		(void)vnode_put(vp);

out:
	switch (result) {
	case KERN_SUCCESS:
		*retval = user_addr + pageoff;
		error = 0;
		break;
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		error =  ENOMEM;
		break;
	case KERN_PROTECTION_FAILURE:
		error =  EACCES;
		break;
	default:
		error =  EINVAL;
		break;
	}
bad:
	if (fpref)
		fp_drop(p, fd, fp, 0);

	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);

	return(error);
}
Example #27
0
/*
 * shared_region_map_np()
 *
 * This system call is intended for dyld.
 *
 * dyld uses this to map a shared cache file into a shared region.
 * This is usually done only the first time a shared cache is needed.
 * Subsequent processes will just use the populated shared region without
 * requiring any further setup.
 */
int
shared_region_map_np(
	struct proc				*p,
	struct shared_region_map_np_args	*uap,
	__unused int				*retvalp)
{
	int				error;
	kern_return_t			kr;
	int				fd;
	struct fileproc			*fp;
	struct vnode			*vp, *root_vp;
	struct vnode_attr		va;
	off_t				fs;
	memory_object_size_t		file_size;
	user_addr_t			user_mappings;
	struct shared_file_mapping_np	*mappings;
#define SFM_MAX_STACK	8
	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
	unsigned int			mappings_count;
	vm_size_t			mappings_size;
	memory_object_control_t		file_control;
	struct vm_shared_region		*shared_region;

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] -> map\n",
		 current_thread(), p->p_pid, p->p_comm));

	shared_region = NULL;
	mappings_count = 0;
	mappings_size = 0;
	mappings = NULL;
	fp = NULL;
	vp = NULL;

	/* get file descriptor for shared region cache file */
	fd = uap->fd;

	/* get file structure from file descriptor */
	error = fp_lookup(p, fd, &fp, 0);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d lookup failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}

	/* make sure we're attempting to map a vnode */
	if (fp->f_fglob->fg_type != DTYPE_VNODE) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not a vnode (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 fd, fp->f_fglob->fg_type));
		error = EINVAL;
		goto done;
	}

	/* we need at least read permission on the file */
	if (! (fp->f_fglob->fg_flag & FREAD)) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not readable\n",
			 current_thread(), p->p_pid, p->p_comm, fd));
		error = EPERM;
		goto done;
	}

	/* get vnode from file structure */
	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d getwithref failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}
	vp = (struct vnode *) fp->f_fglob->fg_data;

	/* make sure the vnode is a regular file */
	if (vp->v_type != VREG) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not a file (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp->v_type));
		error = EINVAL;
		goto done;
	}

	/* make sure vnode is on the process's root volume */
	root_vp = p->p_fd->fd_rdir;
	if (root_vp == NULL) {
		root_vp = rootvnode;
	}
	if (vp->v_mount != root_vp->v_mount) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not on process's root volume\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EPERM;
		goto done;
	}

	/* make sure vnode is owned by "root" */
	VATTR_INIT(&va);
	VATTR_WANTED(&va, va_uid);
	error = vnode_getattr(vp, &va, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_getattr(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	if (va.va_uid != 0) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "owned by uid=%d instead of 0\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, va.va_uid));
		error = EPERM;
		goto done;
	}

	/* get vnode size */
	error = vnode_size(vp, &fs, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_size(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	file_size = fs;

	/* get the file's memory object handle */
	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no memory object\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EINVAL;
		goto done;
	}
			 
	/* get the list of mappings the caller wants us to establish */
	mappings_count = uap->count;	/* number of mappings */
	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
	if (mappings_count == 0) {
		SHARED_REGION_TRACE_INFO(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no mappings\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = 0;	/* no mappings: we're done ! */
		goto done;
	} else if (mappings_count <= SFM_MAX_STACK) {
		mappings = &stack_mappings[0];
	} else {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "too many mappings (%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, mappings_count));
		error = EINVAL;
		goto done;
	}

	user_mappings = uap->mappings;	/* the mappings, in user space */
	error = copyin(user_mappings,
		       mappings,
		       mappings_size);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "copyin(0x%llx, %d) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
		goto done;
	}

	/* get the process's shared region (setup in vm_map_exec()) */
	shared_region = vm_shared_region_get(current_task());
	if (shared_region == NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no shared region\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		goto done;
	}

	/* map the file into that shared region's submap */
	kr = vm_shared_region_map_file(shared_region,
				       mappings_count,
				       mappings,
				       file_control,
				       file_size,
				       (void *) p->p_fd->fd_rdir);
	if (kr != KERN_SUCCESS) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vm_shared_region_map_file() failed kr=0x%x\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, kr));
		switch (kr) {
		case KERN_INVALID_ADDRESS:
			error = EFAULT;
			break;
		case KERN_PROTECTION_FAILURE:
			error = EPERM;
			break;
		case KERN_NO_SPACE:
			error = ENOMEM;
			break;
		case KERN_FAILURE:
		case KERN_INVALID_ARGUMENT:
		default:
			error = EINVAL;
			break;
		}
		goto done;
	}

	/*
	 * The mapping was successful.  Let the buffer cache know
	 * that we've mapped that file with these protections.  This
	 * prevents the vnode from getting recycled while it's mapped.
	 */
	(void) ubc_map(vp, VM_PROT_READ);
	error = 0;

	/* update the vnode's access time */
	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
		VATTR_INIT(&va);
		nanotime(&va.va_access_time);
		VATTR_SET_ACTIVE(&va, va_access_time);
		vnode_setattr(vp, &va, vfs_context_current());
	}

	if (p->p_flag & P_NOSHLIB) {
		/* signal that this process is now using split libraries */
		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
	}

done:
	if (vp != NULL) {
		/*
		 * release the vnode...
		 * ubc_map() still holds it for us in the non-error case
		 */
		(void) vnode_put(vp);
		vp = NULL;
	}
	if (fp != NULL) {
		/* release the file descriptor */
		fp_drop(p, fd, fp, 0);
		fp = NULL;
	}

	if (shared_region != NULL) {
		vm_shared_region_deallocate(shared_region);
	}

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] <- map\n",
		 current_thread(), p->p_pid, p->p_comm));

	return error;
}
Example #28
0
/* ARGSUSED */
int
auditctl(proc_t p, struct auditctl_args *uap, __unused int32_t *retval)
{
	struct nameidata nd;
	kauth_cred_t cred;
	struct vnode *vp;
	int error = 0;

	error = suser(kauth_cred_get(), &p->p_acflag);
	if (error)
		return (error);

	vp = NULL;
	cred = NULL;

	/*
	 * If a path is specified, open the replacement vnode, perform
	 * validity checks, and grab another reference to the current
	 * credential.
	 *
	 * XXX Changes API slightly.  NULL path no longer disables audit but
	 * returns EINVAL.
	 */
	if (uap->path == USER_ADDR_NULL)
		return (EINVAL);

	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 :
	    UIO_USERSPACE32), uap->path, vfs_context_current());
	error = vn_open(&nd, AUDIT_OPEN_FLAGS, 0);
	if (error)
		return (error);
	vp = nd.ni_vp;
#if CONFIG_MACF
	/*
	 * Accessibility of the vnode was determined in vn_open; the
	 * mac_system_check_auditctl should only determine whether that vnode
	 * is appropriate for storing audit data, or that the caller was
	 * permitted to control the auditing system at all.  For example, a
	 * confidentiality policy may want to ensure that audit files are
	 * always high sensitivity.
	 */
	error = mac_system_check_auditctl(kauth_cred_get(), vp);
	if (error) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current());
		vnode_put(vp);
		return (error);
	}
#endif
	if (vp->v_type != VREG) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current());
		vnode_put(vp);
		return (EINVAL);
	}
	mtx_lock(&audit_mtx);
	/*
	 * XXXAUDIT: Should audit_suspended actually be cleared by
	 * audit_worker?
	 */
	audit_suspended = 0;
	mtx_unlock(&audit_mtx);

	/*
	 * The following gets unreferenced in audit_rotate_vnode()
	 * after the rotation and it is no longer needed.
	 */
	cred = kauth_cred_get_with_ref();
	audit_rotate_vnode(cred, vp);
	vnode_put(vp);

	return (error);
}