예제 #1
0
/*
 * non-anonymous, non-stack descriptor mappings only!
 *
 * This routine mostly snarfed from vm/vm_mmap.c
 */
int
fp_mmap(void *addr_arg, size_t size, int prot, int flags, struct file *fp,
    off_t pos, void **resp)
{
    struct thread *td = curthread;
    struct proc *p = td->td_proc;
    vm_size_t pageoff;
    vm_prot_t maxprot;
    vm_offset_t addr;
    void *handle;
    int error;
    vm_object_t obj;
    struct vmspace *vms = p->p_vmspace;
    struct vnode *vp;

    prot &= VM_PROT_ALL;

    if ((ssize_t)size < 0 || (flags & MAP_ANON))
	return(EINVAL);

    pageoff = (pos & PAGE_MASK);
    pos -= pageoff;

    /* Adjust size for rounding (on both ends). */
    size += pageoff;				/* low end... */
    size = (vm_size_t)round_page(size);		/* hi end */
    addr = (vm_offset_t)addr_arg;

    /*
     * Check for illegal addresses.  Watch out for address wrap... Note
     * that VM_*_ADDRESS are not constants due to casts (argh).
     */
    if (flags & MAP_FIXED) {
	/*
	 * The specified address must have the same remainder
	 * as the file offset taken modulo PAGE_SIZE, so it
	 * should be aligned after adjustment by pageoff.
	 */
	addr -= pageoff;
	if (addr & PAGE_MASK)
	    return (EINVAL);
	/* Address range must be all in user VM space. */
	if (VM_MAX_USER_ADDRESS > 0 && addr + size > VM_MAX_USER_ADDRESS)
	    return (EINVAL);
	if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
	    return (EINVAL);
	if (addr + size < addr)
	    return (EINVAL);
    } else if (addr == 0 ||
	(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
	 addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))
    ) {
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz);
    }

    /*
     * Mapping file, get fp for validation. Obtain vnode and make
     * sure it is of appropriate type.
     */
    if (fp->f_type != DTYPE_VNODE)
	return (EINVAL);

    /*
     * POSIX shared-memory objects are defined to have
     * kernel persistence, and are not defined to support
     * read(2)/write(2) -- or even open(2).  Thus, we can
     * use MAP_ASYNC to trade on-disk coherence for speed.
     * The shm_open(3) library routine turns on the FPOSIXSHM
     * flag to request this behavior.
     */
    if (fp->f_flag & FPOSIXSHM)
	flags |= MAP_NOSYNC;
    vp = (struct vnode *) fp->f_data;
    if (vp->v_type != VREG && vp->v_type != VCHR)
	return (EINVAL);

    /*
     * Get the proper underlying object
     */
    if (vp->v_type == VREG) {
	if ((obj = vp->v_object) == NULL)
	    return (EINVAL);
	KKASSERT(vp == (struct vnode *)obj->handle);
    }

    /*
     * XXX hack to handle use of /dev/zero to map anon memory (ala
     * SunOS).
     */
    if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
	handle = NULL;
	maxprot = VM_PROT_ALL;
	flags |= MAP_ANON;
	pos = 0;
    } else {
	/*
	 * cdevs does not provide private mappings of any kind.
	 */
	if (vp->v_type == VCHR && 
	    (flags & (MAP_PRIVATE|MAP_COPY))) {
		error = EINVAL;
		goto done;
	}
	/*
	 * Ensure that file and memory protections are
	 * compatible.  Note that we only worry about
	 * writability if mapping is shared; in this case,
	 * current and max prot are dictated by the open file.
	 * XXX use the vnode instead?  Problem is: what
	 * credentials do we use for determination? What if
	 * proc does a setuid?
	 */
	maxprot = VM_PROT_EXECUTE;	/* ??? */
	if (fp->f_flag & FREAD) {
	    maxprot |= VM_PROT_READ;
	} else if (prot & PROT_READ) {
	    error = EACCES;
	    goto done;
	}
	/*
	 * If we are sharing potential changes (either via
	 * MAP_SHARED or via the implicit sharing of character
	 * device mappings), and we are trying to get write
	 * permission although we opened it without asking
	 * for it, bail out.  
	 */

	if ((flags & MAP_SHARED) != 0 ||
	    (vp->v_type == VCHR)
	) {
	    if ((fp->f_flag & FWRITE) != 0) {
		struct vattr va;
		if ((error = VOP_GETATTR(vp, &va))) {
		    goto done;
		}
		if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) {
		    maxprot |= VM_PROT_WRITE;
		} else if (prot & PROT_WRITE) {
		    error = EPERM;
		    goto done;
		}
	    } else if ((prot & PROT_WRITE) != 0) {
		error = EACCES;
		goto done;
	    }
	} else {
	    maxprot |= VM_PROT_WRITE;
	}
	handle = (void *)vp;
    }
    error = vm_mmap(&vms->vm_map, &addr, size, prot, 
		    maxprot, flags, handle, pos);
    if (error == 0 && addr_arg)
	*resp = (void *)addr;
done:
    return (error);
}
예제 #2
0
/* 
 * mmap_args(void *addr, size_t len, int prot, int flags, int fd,
 *		long pad, off_t pos)
 *
 * Memory Map (mmap) system call.  Note that the file offset
 * and address are allowed to be NOT page aligned, though if
 * the MAP_FIXED flag it set, both must have the same remainder
 * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
 * page-aligned, the actual mapping starts at trunc_page(addr)
 * and the return value is adjusted up by the page offset.
 *
 * Generally speaking, only character devices which are themselves
 * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
 * there would be no cache coherency between a descriptor and a VM mapping
 * both to the same character device.
 *
 * Block devices can be mmap'd no matter what they represent.  Cache coherency
 * is maintained as long as you do not write directly to the underlying
 * character device.
 *
 * No requirements
 */
int
kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
	  int uprot, int uflags, int fd, off_t upos, void **res)
{
	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
	struct file *fp = NULL;
	struct vnode *vp;
	vm_offset_t addr;
	vm_offset_t tmpaddr;
	vm_size_t size, pageoff;
	vm_prot_t prot, maxprot;
	void *handle;
	int flags, error;
	off_t pos;
	vm_object_t obj;

	KKASSERT(p);

	addr = (vm_offset_t) uaddr;
	size = ulen;
	prot = uprot & VM_PROT_ALL;
	flags = uflags;
	pos = upos;

	/*
	 * Make sure mapping fits into numeric range etc.
	 *
	 * NOTE: We support the full unsigned range for size now.
	 */
	if (((flags & MAP_ANON) && (fd != -1 || pos != 0)))
		return (EINVAL);

	if (size == 0)
		return (EINVAL);

	if (flags & MAP_STACK) {
		if ((fd != -1) ||
		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
			return (EINVAL);
		flags |= MAP_ANON;
		pos = 0;
	}

	/*
	 * Virtual page tables cannot be used with MAP_STACK.  Apart from
	 * it not making any sense, the aux union is used by both
	 * types.
	 *
	 * Because the virtual page table is stored in the backing object
	 * and might be updated by the kernel, the mapping must be R+W.
	 */
	if (flags & MAP_VPAGETABLE) {
		if (vkernel_enable == 0)
			return (EOPNOTSUPP);
		if (flags & MAP_STACK)
			return (EINVAL);
		if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
			return (EINVAL);
	}

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (pos & PAGE_MASK);
	pos -= pageoff;

	/* Adjust size for rounding (on both ends). */
	size += pageoff;			/* low end... */
	size = (vm_size_t) round_page(size);	/* hi end */
	if (size < ulen)			/* wrap */
		return(EINVAL);

	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & (MAP_FIXED | MAP_TRYFIXED)) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		addr -= pageoff;
		if (addr & PAGE_MASK)
			return (EINVAL);

		/*
		 * Address range must be all in user VM space and not wrap.
		 */
		tmpaddr = addr + size;
		if (tmpaddr < addr)
			return (EINVAL);
		if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS)
			return (EINVAL);
		if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
			return (EINVAL);
	} else {
		/*
		 * Get a hint of where to map. It also provides mmap offset
		 * randomization if enabled.
		 */
		addr = vm_map_hint(p, addr, prot);
	}

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.
		 */
		handle = NULL;
		maxprot = VM_PROT_ALL;
	} else {
		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		fp = holdfp(p->p_fd, fd, -1);
		if (fp == NULL)
			return (EBADF);
		if (fp->f_type != DTYPE_VNODE) {
			error = EINVAL;
			goto done;
		}
		/*
		 * POSIX shared-memory objects are defined to have
		 * kernel persistence, and are not defined to support
		 * read(2)/write(2) -- or even open(2).  Thus, we can
		 * use MAP_ASYNC to trade on-disk coherence for speed.
		 * The shm_open(3) library routine turns on the FPOSIXSHM
		 * flag to request this behavior.
		 */
		if (fp->f_flag & FPOSIXSHM)
			flags |= MAP_NOSYNC;
		vp = (struct vnode *) fp->f_data;

		/*
		 * Validate the vnode for the operation.
		 */
		switch(vp->v_type) {
		case VREG:
			/*
			 * Get the proper underlying object
			 */
			if ((obj = vp->v_object) == NULL) {
				error = EINVAL;
				goto done;
			}
			KKASSERT((struct vnode *)obj->handle == vp);
			break;
		case VCHR:
			/*
			 * Make sure a device has not been revoked.  
			 * Mappability is handled by the device layer.
			 */
			if (vp->v_rdev == NULL) {
				error = EBADF;
				goto done;
			}
			break;
		default:
			/*
			 * Nothing else is mappable.
			 */
			error = EINVAL;
			goto done;
		}

		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
			handle = NULL;
			maxprot = VM_PROT_ALL;
			flags |= MAP_ANON;
			pos = 0;
		} else {
			/*
			 * cdevs does not provide private mappings of any kind.
			 */
			if (vp->v_type == VCHR &&
			    (flags & (MAP_PRIVATE|MAP_COPY))) {
				error = EINVAL;
				goto done;
			}
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_flag & FREAD) {
				maxprot |= VM_PROT_READ;
			} else if (prot & PROT_READ) {
				error = EACCES;
				goto done;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out.  Check for superuser, only if
			 * we're at securelevel < 1, to allow the XIG X server
			 * to continue to work.
			 */
			if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) {
				if ((fp->f_flag & FWRITE) != 0) {
					struct vattr va;
					if ((error = VOP_GETATTR(vp, &va))) {
						goto done;
					}
					if ((va.va_flags &
					    (IMMUTABLE|APPEND)) == 0) {
						maxprot |= VM_PROT_WRITE;
					} else if (prot & PROT_WRITE) {
						error = EPERM;
						goto done;
					}
				} else if ((prot & PROT_WRITE) != 0) {
					error = EACCES;
					goto done;
				}
			} else {
				maxprot |= VM_PROT_WRITE;
			}
			handle = (void *)vp;
		}
	}

	lwkt_gettoken(&vms->vm_map.token);

	/*
	 * Do not allow more then a certain number of vm_map_entry structures
	 * per process.  Scale with the number of rforks sharing the map
	 * to make the limit reasonable for threads.
	 */
	if (max_proc_mmap && 
	    vms->vm_map.nentries >= max_proc_mmap * vmspace_getrefs(vms)) {
		error = ENOMEM;
		lwkt_reltoken(&vms->vm_map.token);
		goto done;
	}

	error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
			flags, handle, pos);
	if (error == 0)
		*res = (void *)(addr + pageoff);

	lwkt_reltoken(&vms->vm_map.token);
done:
	if (fp)
		fdrop(fp);

	return (error);
}
예제 #3
0
파일: uvm_mmap.c 프로젝트: genua/anoubis_os
int
sys_mmap(struct proc *p, void *v, register_t *retval)
{
	struct sys_mmap_args /* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(int) prot;
		syscallarg(int) flags;
		syscallarg(int) fd;
		syscallarg(long) pad;
		syscallarg(off_t) pos;
	} */ *uap = v;
	vaddr_t addr;
	struct vattr va;
	off_t pos;
	vsize_t size, pageoff;
	vm_prot_t prot, maxprot;
	int flags, fd;
	vaddr_t vm_min_address = VM_MIN_ADDRESS;
	struct filedesc *fdp = p->p_fd;
	struct file *fp = NULL;
	struct vnode *vp;
	caddr_t handle;
	int error;

	/*
	 * first, extract syscall args from the uap.
	 */

	addr = (vaddr_t) SCARG(uap, addr);
	size = (vsize_t) SCARG(uap, len);
	prot = SCARG(uap, prot);
	flags = SCARG(uap, flags);
	fd = SCARG(uap, fd);
	pos = SCARG(uap, pos);

	/*
	 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
	 * validate the flags.
	 */
	if ((prot & VM_PROT_ALL) != prot)
		return (EINVAL);
	if ((flags & MAP_FLAGMASK) != flags)
		return (EINVAL);
	if (flags & MAP_COPY)
		flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
	if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
		return (EINVAL);
	if (flags & MAP_DENYWRITE)
		return (EINVAL);

	/*
	 * align file position and save offset.  adjust size.
	 */
	ALIGN_ADDR(pos, size, pageoff);

	/*
	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 
	 */

	if (flags & MAP_FIXED) {

		/* adjust address by the same amount as we did the offset */
		addr -= pageoff;
		if (addr & PAGE_MASK)
			return (EINVAL);		/* not page aligned */

		if (addr > SIZE_MAX - size)
			return (EINVAL);		/* no wrapping! */
		if (VM_MAXUSER_ADDRESS > 0 &&
		    (addr + size) > VM_MAXUSER_ADDRESS)
			return (EINVAL);
		if (vm_min_address > 0 && addr < vm_min_address)
			return (EINVAL);

	} else {

		/*
		 * not fixed: make sure we skip over the largest possible heap.
		 * we will refine our guess later (e.g. to account for VAC, etc)
		 */
		if (addr == 0)
			addr = uvm_map_hint(p, prot);
		else if (!(flags & MAP_TRYFIXED) &&
		    addr < (vaddr_t)p->p_vmspace->vm_daddr)
			addr = uvm_map_hint(p, prot);
	}

	/*
	 * check for file mappings (i.e. not anonymous) and verify file.
	 */
	if ((flags & MAP_ANON) == 0) {

		if ((fp = fd_getfile(fdp, fd)) == NULL)
			return (EBADF);

		FREF(fp);

		if (fp->f_type != DTYPE_VNODE) {
			error = ENODEV;		/* only mmap vnodes! */
			goto out;
		}
		vp = (struct vnode *)fp->f_data;	/* convert to vnode */

		if (vp->v_type != VREG && vp->v_type != VCHR &&
		    vp->v_type != VBLK) {
			error = ENODEV; /* only REG/CHR/BLK support mmap */
			goto out;
		}

		if (vp->v_type == VREG && (pos + size) < pos) {
			error = EINVAL;		/* no offset wrapping */
			goto out;
		}

		/* special case: catch SunOS style /dev/zero */
		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
			flags |= MAP_ANON;
			FRELE(fp);
			fp = NULL;
			goto is_anon;
		}

		/*
		 * Old programs may not select a specific sharing type, so
		 * default to an appropriate one.
		 *
		 * XXX: how does MAP_ANON fit in the picture?
		 */
		if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
#if defined(DEBUG)
			printf("WARNING: defaulted mmap() share type to "
			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
			    p->p_comm);
#endif
			if (vp->v_type == VCHR)
				flags |= MAP_SHARED;	/* for a device */
			else
				flags |= MAP_PRIVATE;	/* for a file */
		}

		/* 
		 * MAP_PRIVATE device mappings don't make sense (and aren't
		 * supported anyway).  However, some programs rely on this,
		 * so just change it to MAP_SHARED.
		 */
		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
		}

#ifdef ANOUBIS
		/* Force DENYWRITE mappings if file->denywrite is set. */
		if (fp->denywrite)
			flags |= MAP_DENYWRITE;
#endif

		/*
		 * now check protection
		 */

		/*
		 * Don't allow the file to be mapped into executable memory if
		 * the underlying file system is marked as 'noexec'.
		 */
		if (prot & PROT_EXEC && vp->v_mount->mnt_flag & MNT_NOEXEC) {
			error = EACCES;
			goto out;
		}

		maxprot = VM_PROT_EXECUTE;

		/* check read access */
		if (fp->f_flag & FREAD)
			maxprot |= VM_PROT_READ;
		else if (prot & PROT_READ) {
			error = EACCES;
			goto out;
		}

		/* PROT_EXEC only makes sense if the descriptor is readable. */
		if (!(fp->f_flag & FREAD) && prot & PROT_EXEC) {
			error = EACCES;
			goto out;
		}

		/* check write access, shared case first */
		if (flags & MAP_SHARED) {
			/*
			 * if the file is writable, only add PROT_WRITE to
			 * maxprot if the file is not immutable, append-only.
			 * otherwise, if we have asked for PROT_WRITE, return
			 * EPERM.
			 */
			if (fp->f_flag & FWRITE) {
				if ((error =
				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
					goto out;
				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
					maxprot |= VM_PROT_WRITE;
				else if (prot & PROT_WRITE) {
					error = EPERM;
					goto out;
				}
			} else if (prot & PROT_WRITE) {
				error = EACCES;
				goto out;
			}
		} else {
			/* MAP_PRIVATE mappings can always write to */
			maxprot |= VM_PROT_WRITE;
		}

#ifdef MAC
		error = mac_vnode_check_mmap(p->p_ucred, vp, prot, flags);
		if (error)
			goto out;
#endif

		vfs_mark_atime(vp, p->p_ucred);

		/*
		 * set handle to vnode
		 */

		handle = (caddr_t)vp;

	} else {		/* MAP_ANON case */
		/*
		 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
		 */
		if (fd != -1) {
			error = EINVAL;
			goto out;
		}

 is_anon:		/* label for SunOS style /dev/zero */
		handle = NULL;
		maxprot = VM_PROT_ALL;
		pos = 0;
	}

	if ((flags & MAP_ANON) != 0 ||
	    ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
		if (size >
		    (p->p_rlimit[RLIMIT_DATA].rlim_cur - ptoa(p->p_vmspace->vm_dused))) {
			error = ENOMEM;
			goto out;
		}
	}

	/*
	 * now let kernel internal function uvm_mmap do the work.
	 */

	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
	    flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);

	if (error == 0)
		/* remember to add offset */
		*retval = (register_t)(addr + pageoff);

out:
	if (fp)
		FRELE(fp);	
	return (error);
}