Пример #1
0
/*
 * The file size of a mach-o file is limited to 32 bits; this is because
 * this is the limit on the kalloc() of enough bytes for a mach_header and
 * the contents of its sizeofcmds, which is currently constrained to 32
 * bits in the file format itself.  We read into the kernel buffer the
 * commands section, and then parse it in order to parse the mach-o file
 * format load_command segment(s).  We are only interested in a subset of
 * the total set of possible commands. If "map"==VM_MAP_NULL or
 * "thread"==THREAD_NULL, do not make permament VM modifications,
 * just preflight the parse.
 */
static
load_return_t
parse_machfile(
	struct vnode 		*vp,       
	vm_map_t		map,
	thread_t		thread,
	struct mach_header	*header,
	off_t			file_offset,
	off_t			macho_size,
	int			depth,
	int64_t			aslr_offset,
	load_result_t		*result
)
{
	uint32_t		ncmds;
	struct load_command	*lcp;
	struct dylinker_command	*dlp = 0;
	struct uuid_command	*uulp = 0;
	integer_t		dlarchbits = 0;
	void *			control;
	load_return_t		ret = LOAD_SUCCESS;
	caddr_t			addr;
	void *			kl_addr;
	vm_size_t		size,kl_size;
	size_t			offset;
	size_t			oldoffset;	/* for overflow check */
	int			pass;
	proc_t			p = current_proc();		/* XXXX */
	int			error;
	int resid=0;
	size_t			mach_header_sz = sizeof(struct mach_header);
	boolean_t		abi64;
	boolean_t		got_code_signatures = FALSE;
	int64_t			slide = 0;

	if (header->magic == MH_MAGIC_64 ||
	    header->magic == MH_CIGAM_64) {
	    	mach_header_sz = sizeof(struct mach_header_64);
	}

	/*
	 *	Break infinite recursion
	 */
	if (depth > 6) {
		return(LOAD_FAILURE);
	}

	depth++;

	/*
	 *	Check to see if right machine type.
	 */
	if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != cpu_type()) ||
	    !grade_binary(header->cputype, 
	    	header->cpusubtype & ~CPU_SUBTYPE_MASK))
		return(LOAD_BADARCH);
		
	abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64);
		
	switch (header->filetype) {
	
	case MH_OBJECT:
	case MH_EXECUTE:
	case MH_PRELOAD:
		if (depth != 1) {
			return (LOAD_FAILURE);
		}
		break;
		
	case MH_FVMLIB:
	case MH_DYLIB:
		if (depth == 1) {
			return (LOAD_FAILURE);
		}
		break;

	case MH_DYLINKER:
		if (depth != 2) {
			return (LOAD_FAILURE);
		}
		break;
		
	default:
		return (LOAD_FAILURE);
	}

	/*
	 *	Get the pager for the file.
	 */
	control = ubc_getobject(vp, UBC_FLAGS_NONE);

	/*
	 *	Map portion that must be accessible directly into
	 *	kernel's map.
	 */
	if ((off_t)(mach_header_sz + header->sizeofcmds) > macho_size)
		return(LOAD_BADMACHO);

	/*
	 *	Round size of Mach-O commands up to page boundry.
	 */
	size = round_page(mach_header_sz + header->sizeofcmds);
	if (size <= 0)
		return(LOAD_BADMACHO);

	/*
	 * Map the load commands into kernel memory.
	 */
	addr = 0;
	kl_size = size;
	kl_addr = kalloc(size);
	addr = (caddr_t)kl_addr;
	if (addr == NULL)
		return(LOAD_NOSPACE);

	error = vn_rdwr(UIO_READ, vp, addr, size, file_offset,
	    UIO_SYSSPACE, 0, kauth_cred_get(), &resid, p);
	if (error) {
		if (kl_addr )
			kfree(kl_addr, kl_size);
		return(LOAD_IOERROR);
	}

	/*
	 *	For PIE and dyld, slide everything by the ASLR offset.
	 */
    aslr_offset = 0;
	if ((header->flags & MH_PIE) || (header->filetype == MH_DYLINKER)) {
		slide = aslr_offset;
	}

	/*
	 *	Scan through the commands, processing each one as necessary.
	 */
	for (pass = 1; pass <= 3; pass++) {

		/*
		 * Check that the entry point is contained in an executable segments
		 */ 
		if ((pass == 3) && (result->validentry == 0)) {
			thread_state_initialize(thread);
			ret = LOAD_FAILURE;
			break;
		}

		/*
		 * Loop through each of the load_commands indicated by the
		 * Mach-O header; if an absurd value is provided, we just
		 * run off the end of the reserved section by incrementing
		 * the offset too far, so we are implicitly fail-safe.
		 */
		offset = mach_header_sz;
		ncmds = header->ncmds;

		while (ncmds--) {
			/*
			 *	Get a pointer to the command.
			 */
			lcp = (struct load_command *)(addr + offset);
			oldoffset = offset;
			offset += lcp->cmdsize;

			/*
			 * Perform prevalidation of the struct load_command
			 * before we attempt to use its contents.  Invalid
			 * values are ones which result in an overflow, or
			 * which can not possibly be valid commands, or which
			 * straddle or exist past the reserved section at the
			 * start of the image.
			 */
			if (oldoffset > offset ||
			    lcp->cmdsize < sizeof(struct load_command) ||
			    offset > header->sizeofcmds + mach_header_sz) {
				ret = LOAD_BADMACHO;
				break;
			}

			/*
			 * Act on struct load_command's for which kernel
			 * intervention is required.
			 */
			switch(lcp->cmd) {
			case LC_SEGMENT:
			case LC_SEGMENT_64:
				if (pass != 2)
					break;
				ret = load_segment(lcp,
				    		   header->filetype,
						   control,
						   file_offset,
						   macho_size,
						   vp,
						   map,
						   slide,
						   result);
				break;
			case LC_UNIXTHREAD:
				if (pass != 1)
					break;
				ret = load_unixthread(
						 (struct thread_command *) lcp,
						 thread,
						 slide,
						 result);
				break;
			case LC_MAIN:
				if (pass != 1)
					break;
				if (depth != 1)
					break;
				ret = load_main(
						 (struct entry_point_command *) lcp,
						 thread,
						 slide,
						 result);
				break;
			case LC_LOAD_DYLINKER:
				if (pass != 3)
					break;
				if ((depth == 1) && (dlp == 0)) {
					dlp = (struct dylinker_command *)lcp;
					dlarchbits = (header->cputype & CPU_ARCH_MASK);
				} else {
					ret = LOAD_FAILURE;
				}
				break;
			case LC_UUID:
				if (pass == 1 && depth == 1) {
					uulp = (struct uuid_command *)lcp;
					memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid));
				}
				break;
			case LC_CODE_SIGNATURE:
				/* CODE SIGNING */
				if (pass != 1)
					break;
				/* pager -> uip ->
				   load signatures & store in uip
				   set VM object "signed_pages"
				*/
				ret = load_code_signature(
					(struct linkedit_data_command *) lcp,
					vp,
					file_offset,
					macho_size,
					header->cputype,
					(depth == 1) ? result : NULL);
				if (ret != LOAD_SUCCESS) {
					printf("proc %d: load code signature error %d "
					       "for file \"%s\"\n",
					       p->p_pid, ret, vp->v_name);
					ret = LOAD_SUCCESS; /* ignore error */
				} else {
					got_code_signatures = TRUE;
				}
				break;
#if CONFIG_CODE_DECRYPTION
#ifndef __arm__
			case LC_ENCRYPTION_INFO:
				if (pass != 3)
					break;
				ret = set_code_unprotect(
					(struct encryption_info_command *) lcp,
					addr, map, slide, vp);
				if (ret != LOAD_SUCCESS) {
					printf("proc %d: set_code_unprotect() error %d "
					       "for file \"%s\"\n",
					       p->p_pid, ret, vp->v_name);
					/* Don't let the app run if it's 
					 * encrypted but we failed to set up the
					 * decrypter */
					 psignal(p, SIGKILL);
				}
				break;
#endif
#endif
			default:
				/* Other commands are ignored by the kernel */
				ret = LOAD_SUCCESS;
				break;
			}
			if (ret != LOAD_SUCCESS)
				break;
		}
		if (ret != LOAD_SUCCESS)
			break;
	}
	if (ret == LOAD_SUCCESS) { 
	    if (! got_code_signatures) {
		    struct cs_blob *blob;
		    /* no embedded signatures: look for detached ones */
		    blob = ubc_cs_blob_get(vp, -1, file_offset);
		    if (blob != NULL) {
			    /* get flags to be applied to the process */
			    result->csflags |= blob->csb_flags;
		    }
	    }

		/* Make sure if we need dyld, we got it */
		if (result->needs_dynlinker && !dlp) {
			ret = LOAD_FAILURE;
		}

	    if ((ret == LOAD_SUCCESS) && (dlp != 0)) {
		    /* load the dylinker, and always slide it by the ASLR
		     * offset regardless of PIE */
		    ret = load_dylinker(dlp, dlarchbits, map, thread, depth, aslr_offset, result);
	    }

	    if((ret == LOAD_SUCCESS) && (depth == 1)) {
			if (result->thread_count == 0) {
				ret = LOAD_FAILURE;
			}
	    }
	}

	if (kl_addr )
		kfree(kl_addr, kl_size);

	return(ret);
}
Пример #2
0
/*
 * shared_region_map_np()
 *
 * This system call is intended for dyld.
 *
 * dyld uses this to map a shared cache file into a shared region.
 * This is usually done only the first time a shared cache is needed.
 * Subsequent processes will just use the populated shared region without
 * requiring any further setup.
 */
int
shared_region_map_np(
	struct proc				*p,
	struct shared_region_map_np_args	*uap,
	__unused int				*retvalp)
{
	int				error;
	kern_return_t			kr;
	int				fd;
	struct fileproc			*fp;
	struct vnode			*vp, *root_vp;
	struct vnode_attr		va;
	off_t				fs;
	memory_object_size_t		file_size;
	user_addr_t			user_mappings;
	struct shared_file_mapping_np	*mappings;
#define SFM_MAX_STACK	8
	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
	unsigned int			mappings_count;
	vm_size_t			mappings_size;
	memory_object_control_t		file_control;
	struct vm_shared_region		*shared_region;

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] -> map\n",
		 current_thread(), p->p_pid, p->p_comm));

	shared_region = NULL;
	mappings_count = 0;
	mappings_size = 0;
	mappings = NULL;
	fp = NULL;
	vp = NULL;

	/* get file descriptor for shared region cache file */
	fd = uap->fd;

	/* get file structure from file descriptor */
	error = fp_lookup(p, fd, &fp, 0);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d lookup failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}

	/* make sure we're attempting to map a vnode */
	if (fp->f_fglob->fg_type != DTYPE_VNODE) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not a vnode (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 fd, fp->f_fglob->fg_type));
		error = EINVAL;
		goto done;
	}

	/* we need at least read permission on the file */
	if (! (fp->f_fglob->fg_flag & FREAD)) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not readable\n",
			 current_thread(), p->p_pid, p->p_comm, fd));
		error = EPERM;
		goto done;
	}

	/* get vnode from file structure */
	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d getwithref failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}
	vp = (struct vnode *) fp->f_fglob->fg_data;

	/* make sure the vnode is a regular file */
	if (vp->v_type != VREG) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not a file (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp->v_type));
		error = EINVAL;
		goto done;
	}

	/* make sure vnode is on the process's root volume */
	root_vp = p->p_fd->fd_rdir;
	if (root_vp == NULL) {
		root_vp = rootvnode;
	}
	if (vp->v_mount != root_vp->v_mount) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not on process's root volume\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EPERM;
		goto done;
	}

	/* make sure vnode is owned by "root" */
	VATTR_INIT(&va);
	VATTR_WANTED(&va, va_uid);
	error = vnode_getattr(vp, &va, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_getattr(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	if (va.va_uid != 0) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "owned by uid=%d instead of 0\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, va.va_uid));
		error = EPERM;
		goto done;
	}

	/* get vnode size */
	error = vnode_size(vp, &fs, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_size(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	file_size = fs;

	/* get the file's memory object handle */
	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no memory object\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EINVAL;
		goto done;
	}
			 
	/* get the list of mappings the caller wants us to establish */
	mappings_count = uap->count;	/* number of mappings */
	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
	if (mappings_count == 0) {
		SHARED_REGION_TRACE_INFO(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no mappings\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = 0;	/* no mappings: we're done ! */
		goto done;
	} else if (mappings_count <= SFM_MAX_STACK) {
		mappings = &stack_mappings[0];
	} else {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "too many mappings (%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, mappings_count));
		error = EINVAL;
		goto done;
	}

	user_mappings = uap->mappings;	/* the mappings, in user space */
	error = copyin(user_mappings,
		       mappings,
		       mappings_size);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "copyin(0x%llx, %d) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
		goto done;
	}

	/* get the process's shared region (setup in vm_map_exec()) */
	shared_region = vm_shared_region_get(current_task());
	if (shared_region == NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no shared region\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		goto done;
	}

	/* map the file into that shared region's submap */
	kr = vm_shared_region_map_file(shared_region,
				       mappings_count,
				       mappings,
				       file_control,
				       file_size,
				       (void *) p->p_fd->fd_rdir);
	if (kr != KERN_SUCCESS) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vm_shared_region_map_file() failed kr=0x%x\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, kr));
		switch (kr) {
		case KERN_INVALID_ADDRESS:
			error = EFAULT;
			break;
		case KERN_PROTECTION_FAILURE:
			error = EPERM;
			break;
		case KERN_NO_SPACE:
			error = ENOMEM;
			break;
		case KERN_FAILURE:
		case KERN_INVALID_ARGUMENT:
		default:
			error = EINVAL;
			break;
		}
		goto done;
	}

	/*
	 * The mapping was successful.  Let the buffer cache know
	 * that we've mapped that file with these protections.  This
	 * prevents the vnode from getting recycled while it's mapped.
	 */
	(void) ubc_map(vp, VM_PROT_READ);
	error = 0;

	/* update the vnode's access time */
	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
		VATTR_INIT(&va);
		nanotime(&va.va_access_time);
		VATTR_SET_ACTIVE(&va, va_access_time);
		vnode_setattr(vp, &va, vfs_context_current());
	}

	if (p->p_flag & P_NOSHLIB) {
		/* signal that this process is now using split libraries */
		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
	}

done:
	if (vp != NULL) {
		/*
		 * release the vnode...
		 * ubc_map() still holds it for us in the non-error case
		 */
		(void) vnode_put(vp);
		vp = NULL;
	}
	if (fp != NULL) {
		/* release the file descriptor */
		fp_drop(p, fd, fp, 0);
		fp = NULL;
	}

	if (shared_region != NULL) {
		vm_shared_region_deallocate(shared_region);
	}

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] <- map\n",
		 current_thread(), p->p_pid, p->p_comm));

	return error;
}
Пример #3
0
/*
 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
 * XXX usage is PROT_* from an interface perspective.  Thus the values of
 * XXX VM_PROT_* and PROT_* need to correspond.
 */
int
mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
{
	/*
	 *	Map in special device (must be SHARED) or file
	 */
	struct fileproc *fp;
	struct			vnode *vp;
	int			flags;
	int			prot;
	int			err=0;
	vm_map_t		user_map;
	kern_return_t		result;
	vm_map_offset_t		user_addr;
	vm_map_size_t		user_size;
	vm_object_offset_t	pageoff;
	vm_object_offset_t	file_pos;
	int			alloc_flags = 0;
	vm_tag_t		tag = VM_KERN_MEMORY_NONE;
	vm_map_kernel_flags_t	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
	boolean_t		docow;
	vm_prot_t		maxprot;
	void 			*handle;
	memory_object_t		pager = MEMORY_OBJECT_NULL;
	memory_object_control_t	 control;
	int 			mapanon=0;
	int 			fpref=0;
	int error =0;
	int fd = uap->fd;
	int num_retries = 0;

	/*
	 * Note that for UNIX03 conformance, there is additional parameter checking for
	 * mmap() system call in libsyscall prior to entering the kernel.  The sanity 
	 * checks and argument validation done in this function are not the only places
	 * one can get returned errnos.
	 */

	user_map = current_map();
	user_addr = (vm_map_offset_t)uap->addr;
	user_size = (vm_map_size_t) uap->len;

	AUDIT_ARG(addr, user_addr);
	AUDIT_ARG(len, user_size);
	AUDIT_ARG(fd, uap->fd);

	prot = (uap->prot & VM_PROT_ALL);
#if 3777787
	/*
	 * Since the hardware currently does not support writing without
	 * read-before-write, or execution-without-read, if the request is
	 * for write or execute access, we must imply read access as well;
	 * otherwise programs expecting this to work will fail to operate.
	 */
	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
		prot |= VM_PROT_READ;
#endif	/* radar 3777787 */

	flags = uap->flags;
	vp = NULLVP;

	/*
	 * The vm code does not have prototypes & compiler doesn't do the'
	 * the right thing when you cast 64bit value and pass it in function 
	 * call. So here it is.
	 */
	file_pos = (vm_object_offset_t)uap->pos;


	/* make sure mapping fits into numeric range etc */
	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
		return (EINVAL);

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (file_pos & vm_map_page_mask(user_map));
	file_pos -= (vm_object_offset_t)pageoff;


	/* Adjust size for rounding (on both ends). */
	user_size += pageoff;	/* low end... */
	user_size = vm_map_round_page(user_size,	
				      vm_map_page_mask(user_map)); /* hi end */

	if (flags & MAP_JIT) {
		if ((flags & MAP_FIXED) ||
		    (flags & MAP_SHARED) ||
		    !(flags & MAP_ANON) ||
		    (flags & MAP_RESILIENT_CODESIGN) ||
		    (flags & MAP_RESILIENT_MEDIA)) {
			return EINVAL;
		}
	}

	if ((flags & MAP_RESILIENT_CODESIGN) ||
	    (flags & MAP_RESILIENT_MEDIA)) {
		if ((flags & MAP_ANON) ||
		    (flags & MAP_JIT)) {
			return EINVAL;
		}
		if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
			return EPERM;
		}
	}

	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & MAP_FIXED) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		user_addr -= pageoff;
		if (user_addr & vm_map_page_mask(user_map))
			return (EINVAL);
	}
#ifdef notyet
	/* DO not have apis to get this info, need to wait till then*/
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
					  vm_map_page_mask(user_map)))
		addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
					 vm_map_page_mask(user_map));

#endif

	alloc_flags = 0;

	if (flags & MAP_ANON) {

		maxprot = VM_PROT_ALL;
#if CONFIG_MACF
		/*
		 * Entitlement check.
		 */
		error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
		if (error) {
			return EINVAL;
		}		
#endif /* MAC */

		/*
		 * Mapping blank space is trivial.  Use positive fds as the alias
		 * value for memory tracking. 
		 */
		if (fd != -1) {
			/*
			 * Use "fd" to pass (some) Mach VM allocation flags,
			 * (see the VM_FLAGS_* definitions).
			 */
			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
					    VM_FLAGS_SUPERPAGE_MASK |
					    VM_FLAGS_PURGABLE |
					    VM_FLAGS_4GB_CHUNK);
			if (alloc_flags != fd) {
				/* reject if there are any extra flags */
				return EINVAL;
			}
			VM_GET_FLAGS_ALIAS(alloc_flags, tag);
			alloc_flags &= ~VM_FLAGS_ALIAS_MASK;
		}
			
		handle = NULL;
		file_pos = 0;
		mapanon = 1;
	} else {
		struct vnode_attr va;
		vfs_context_t ctx = vfs_context_current();

		if (flags & MAP_JIT)
			return EINVAL;

		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		err = fp_lookup(p, fd, &fp, 0);
		if (err)
			return(err);
		fpref = 1;
		switch (FILEGLOB_DTYPE(fp->f_fglob)) {
		case DTYPE_PSXSHM:
			uap->addr = (user_addr_t)user_addr;
			uap->len = (user_size_t)user_size;
			uap->prot = prot;
			uap->flags = flags;
			uap->pos = file_pos;
			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
			goto bad;
		case DTYPE_VNODE:
			break;
		default:
			error = EINVAL;
			goto bad;
		}
		vp = (struct vnode *)fp->f_fglob->fg_data;
		error = vnode_getwithref(vp);
		if(error != 0)
			goto bad;

		if (vp->v_type != VREG && vp->v_type != VCHR) {
			(void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
		
		/*
		 * POSIX: mmap needs to update access time for mapped files
		 */
		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
			VATTR_INIT(&va);
			nanotime(&va.va_access_time);
			VATTR_SET_ACTIVE(&va, va_access_time);
			vnode_setattr(vp, &va, ctx);
		}

		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR || vp->v_type == VSTR) {
			(void)vnode_put(vp);
			error = ENODEV;
			goto bad;
		} else {
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_fglob->fg_flag & FREAD)
				maxprot |= VM_PROT_READ;
			else if (prot & PROT_READ) {
				(void)vnode_put(vp);
				error = EACCES;
				goto bad;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out. 
			 */

			if ((flags & MAP_SHARED) != 0) {
				if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
				    /*
				     * Do not allow writable mappings of 
				     * swap files (see vm_swapfile_pager.c).
				     */
				    !vnode_isswap(vp)) {
 					/*
 					 * check for write access
 					 *
 					 * Note that we already made this check when granting FWRITE
 					 * against the file, so it seems redundant here.
 					 */
 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
 
 					/* if not granted for any reason, but we wanted it, bad */
 					if ((prot & PROT_WRITE) && (error != 0)) {
 						vnode_put(vp);
  						goto bad;
  					}
 
 					/* if writable, remember */
 					if (error == 0)
  						maxprot |= VM_PROT_WRITE;

				} else if ((prot & PROT_WRITE) != 0) {
					(void)vnode_put(vp);
					error = EACCES;
					goto bad;
				}
			} else
				maxprot |= VM_PROT_WRITE;

			handle = (void *)vp;
#if CONFIG_MACF
			error = mac_file_check_mmap(vfs_context_ucred(ctx),
			    fp->f_fglob, prot, flags, file_pos, &maxprot);
			if (error) {
				(void)vnode_put(vp);
				goto bad;
			}
#endif /* MAC */
		}
	}

	if (user_size == 0)  {
		if (!mapanon)
			(void)vnode_put(vp);
		error = 0;
		goto bad;
	}

	/*
	 *	We bend a little - round the start and end addresses
	 *	to the nearest page boundary.
	 */
	user_size = vm_map_round_page(user_size,
				      vm_map_page_mask(user_map));

	if (file_pos & vm_map_page_mask(user_map)) {
		if (!mapanon)
			(void)vnode_put(vp);
		error = EINVAL;
		goto bad;
	}

	if ((flags & MAP_FIXED) == 0) {
		alloc_flags |= VM_FLAGS_ANYWHERE;
		user_addr = vm_map_round_page(user_addr,
					      vm_map_page_mask(user_map));
	} else {
		if (user_addr != vm_map_trunc_page(user_addr,
						   vm_map_page_mask(user_map))) {
		        if (!mapanon)
			        (void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}
		/*
		 * mmap(MAP_FIXED) will replace any existing mappings in the
		 * specified range, if the new mapping is successful.
		 * If we just deallocate the specified address range here,
		 * another thread might jump in and allocate memory in that
		 * range before we get a chance to establish the new mapping,
		 * and we won't have a chance to restore the old mappings.
		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
		 * has to deallocate the existing mappings and establish the
		 * new ones atomically.
		 */
		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
	}

	if (flags & MAP_NOCACHE)
		alloc_flags |= VM_FLAGS_NO_CACHE;

	if (flags & MAP_JIT) {
		vmk_flags.vmkf_map_jit = TRUE;
	}

	if (flags & MAP_RESILIENT_CODESIGN) {
		alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
	}

	/*
	 * Lookup/allocate object.
	 */
	if (handle == NULL) {
		control = NULL;
#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */
map_anon_retry:
		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags, vmk_flags,
						 tag,
						 IPC_PORT_NULL, 0, FALSE,
						 prot, maxprot,
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		/* If a non-binding address was specified for this anonymous
		 * mapping, retry the mapping with a zero base
		 * in the event the mapping operation failed due to
		 * lack of space between the address and the map's maximum.
		 */
		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
			user_addr = vm_map_page_size(user_map);
			goto map_anon_retry;
		}
	} else {
		if (vnode_isswap(vp)) {
			/*
			 * Map swap files with a special pager
			 * that returns obfuscated contents.
			 */
			control = NULL;
			pager = swapfile_pager_setup(vp);
			if (pager != MEMORY_OBJECT_NULL) {
				control = swapfile_pager_control(pager);
			}
		} else {
			control = ubc_getobject(vp, UBC_FLAGS_NONE);
		}
		
		if (control == NULL) {
			(void)vnode_put(vp);
			error = ENOMEM;
			goto bad;
		}

		/*
		 *  Set credentials:
		 *	FIXME: if we're writing the file we need a way to
		 *      ensure that someone doesn't replace our R/W creds
		 * 	with ones that only work for read.
		 */

		ubc_setthreadcred(vp, p, current_thread());
		docow = FALSE;
		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
			docow = TRUE;
		}

#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif /* notyet */

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

map_file_retry:
		if ((flags & MAP_RESILIENT_CODESIGN) ||
		    (flags & MAP_RESILIENT_MEDIA)) {
			if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
				assert(!mapanon);
				vnode_put(vp);
				error = EPERM;
				goto bad;
			}
			/* strictly limit access to "prot" */
			maxprot &= prot;
		}

		vm_object_offset_t end_pos = 0;
		if (os_add_overflow(user_size, file_pos, &end_pos)) {
			vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		result = vm_map_enter_mem_object_control(user_map,
						 &user_addr, user_size,
						 0, alloc_flags, vmk_flags,
						 tag,
						 control, file_pos,
						 docow, prot, maxprot, 
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		/* If a non-binding address was specified for this file backed
		 * mapping, retry the mapping with a zero base
		 * in the event the mapping operation failed due to
		 * lack of space between the address and the map's maximum.
		 */
		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
			user_addr = vm_map_page_size(user_map);
			goto map_file_retry;
		}
	}

	if (!mapanon) {
		(void)vnode_put(vp);
	}

	switch (result) {
	case KERN_SUCCESS:
		*retval = user_addr + pageoff;
		error = 0;
		break;
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		error =  ENOMEM;
		break;
	case KERN_PROTECTION_FAILURE:
		error =  EACCES;
		break;
	default:
		error =  EINVAL;
		break;
	}
bad:
	if (pager != MEMORY_OBJECT_NULL) {
		/*
		 * Release the reference on the pager.
		 * If the mapping was successful, it now holds
		 * an extra reference.
		 */
		memory_object_deallocate(pager);
	}
	if (fpref)
		fp_drop(p, fd, fp, 0);

	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
#ifndef	CONFIG_EMBEDDED
	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
#endif
	return(error);
}