Example #1
0
/*
 *	Allocate new wired pages in an object.
 *	The object is assumed to be mapped into the kernel map or
 *	a submap.
 */
void
kmem_alloc_pages(
	vm_object_t	object,
	vm_offset_t	offset,
	vm_offset_t	start, 
	vm_offset_t	end,
	vm_prot_t	protection)
{
	/*
	 *	Mark the pmap region as not pageable.
	 */
	pmap_pageable(kernel_pmap, start, end, FALSE);

	while (start < end) {
	    vm_page_t	mem;

	    vm_object_lock(object);

	    /*
	     *	Allocate a page
	     */
	    while ((mem = vm_page_alloc(object, offset))
			 == VM_PAGE_NULL) {
		vm_object_unlock(object);
		VM_PAGE_WAIT((void (*)()) 0);
		vm_object_lock(object);
	    }

	    /*
	     *	Wire it down
	     */
	    vm_page_lock_queues();
	    vm_page_wire(mem);
	    vm_page_unlock_queues();
	    vm_object_unlock(object);

	    /*
	     *	Enter it in the kernel pmap
	     */
	    PMAP_ENTER(kernel_pmap, start, mem,
		       protection, TRUE);

	    vm_object_lock(object);
	    PAGE_WAKEUP_DONE(mem);
	    vm_object_unlock(object);

	    start += PAGE_SIZE;
	    offset += PAGE_SIZE;
	}
}
Example #2
0
vm_object_t
find_vnode_object(
	vm_map_entry_t	entry
)
{
	vm_object_t			top_object, object;
	memory_object_t 		memory_object;
	memory_object_pager_ops_t	pager_ops;

	if (!entry->is_sub_map) {

		/*
		 * The last object in the shadow chain has the
		 * relevant pager information.
		 */

		top_object = entry->object.vm_object;

		if (top_object) {
			vm_object_lock(top_object);

			for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
				vm_object_lock(object->shadow);
				vm_object_unlock(object);
			}

			if (object && !object->internal && object->pager_ready && !object->terminating &&
			    object->alive) {
				memory_object = object->pager;
				pager_ops = memory_object->mo_pager_ops;

				/*
				 * If this object points to the vnode_pager_ops, then we found what we're
				 * looking for.  Otherwise, this vm_map_entry doesn't have an underlying
				 * vnode and so we fall through to the bottom and return NULL.
				 */

				if (pager_ops == &vnode_pager_ops) 
					return object;		/* we return with the object locked */
			}

			vm_object_unlock(object);
		}

	}

	return(VM_OBJECT_NULL);
}
Example #3
0
/*
 * osi_ubc_flush_dirty_and_wait -- ensure all dirty pages cleaned
 *
 * Alpha OSF/1 doesn't make it easy to wait for all dirty pages to be cleaned.
 * NFS tries to do this by calling waitforio(), which waits for v_numoutput
 * to go to zero.  But that isn't good enough, because afs_putpage() doesn't
 * increment v_numoutput until it has obtained the vcache entry lock.  Suppose
 * that Process A, trying to flush a page, is waiting for that lock, and
 * Process B tries to close the file.  Process B calls waitforio() which thinks
 * that everything is cool because v_numoutput is still zero.  Process B then
 * proceeds to call afs_StoreAllSegments().  Finally when B is finished, A gets
 * to proceed and flush its page.  But then it's too late because the file is
 * already closed.
 *
 * (I suspect that waitforio() is not adequate for NFS, just as it isn't
 * adequate for us.  But that's not my problem.)
 *
 * The only way we can be sure that there are no more dirty pages is if there
 * are no more pages with pg_busy set.  We look for them on the cleanpl.
 *
 * For some reason, ubc_flush_dirty() only looks at the dirtypl, not the
 * dirtywpl.  I don't know why this is good enough, but I assume it is.  By
 * the same token, I only look for busy pages on the cleanpl, not the cleanwpl.
 *
 * Called with the global lock NOT held.
 */
static void
osi_ubc_flush_dirty_and_wait(struct vnode *vp, int flags)
{
    int retry;
    vm_page_t pp;
    int first;

#ifdef SECRETLY_OSF1
    do {
	struct vm_ubc_object *vop;
	vop = (struct vm_ubc_object *)(vp->v_object);
	ubc_flush_dirty(vop, flags);

	vm_object_lock(vop);
	if (vop->vu_dirtypl)
	    /* shouldn't happen, but who knows */
	    retry = 1;
	else {
	    retry = 0;
	    if (vop->vu_cleanpl) {
		for (first = 1, pp = vop->vu_cleanpl;
		     first || pp != vop->vu_cleanpl;
		     first = 0, pp = pp->pg_onext) {
		    if (pp->pg_busy) {
			retry = 1;
			pp->pg_wait = 1;
			assert_wait_mesg((vm_offset_t) pp, FALSE, "pg_wait");
			vm_object_unlock(vop);
			thread_block();
			break;
		    }
		}
	    }
	    if (retry)
		continue;
	}
	vm_object_unlock(vop);
    } while (retry);
#endif /* SECRETLY_OSF1 */
}
Example #4
0
/*
 *	Remap wired pages in an object into a new region.
 *	The object is assumed to be mapped into the kernel map or
 *	a submap.
 */
void
kmem_remap_pages(
	vm_object_t	object,
	vm_offset_t	offset,
	vm_offset_t	start, 
	vm_offset_t	end,
	vm_prot_t	protection)
{
	/*
	 *	Mark the pmap region as not pageable.
	 */
	pmap_pageable(kernel_pmap, start, end, FALSE);

	while (start < end) {
	    vm_page_t	mem;

	    vm_object_lock(object);

	    /*
	     *	Find a page
	     */
	    if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
		panic("kmem_remap_pages");

	    /*
	     *	Wire it down (again)
	     */
	    vm_page_lock_queues();
	    vm_page_wire(mem);
	    vm_page_unlock_queues();
	    vm_object_unlock(object);

	    /*
	     *	Enter it in the kernel pmap.  The page isn't busy,
	     *	but this shouldn't be a problem because it is wired.
	     */
	    PMAP_ENTER(kernel_pmap, start, mem,
		       protection, TRUE);

	    start += PAGE_SIZE;
	    offset += PAGE_SIZE;
	}
}
Example #5
0
kern_return_t
vm32_region_info_64(
	__DEBUG_ONLY vm_map_t			map,
	__DEBUG_ONLY vm32_offset_t		address,
	__DEBUG_ONLY vm_info_region_64_t	*regionp,
	__DEBUG_ONLY vm_info_object_array_t	*objectsp,
	__DEBUG_ONLY mach_msg_type_number_t	*objectsCntp)
{
#if !MACH_VM_DEBUG
        return KERN_FAILURE;
#else
	vm_map_copy_t copy;
	vm_offset_t addr = 0;	/* memory for OOL data */
	vm_size_t size;		/* size of the memory */
	unsigned int room;	/* room for this many objects */
	unsigned int used;	/* actually this many objects */
	vm_info_region_64_t region;
	kern_return_t kr;

	if (map == VM_MAP_NULL)
		return KERN_INVALID_TASK;

	size = 0;		/* no memory allocated yet */

	for (;;) {
		vm_map_t cmap;	/* current map in traversal */
		vm_map_t nmap;	/* next map to look at */
		vm_map_entry_t entry;
		vm_object_t object, cobject, nobject;

		/* nothing is locked */

		vm_map_lock_read(map);
		for (cmap = map;; cmap = nmap) {
			/* cmap is read-locked */

			if (!vm_map_lookup_entry(cmap, address, &entry)) {
				entry = entry->vme_next;
				if (entry == vm_map_to_entry(cmap)) {
					vm_map_unlock_read(cmap);
					if (size != 0)
						kmem_free(ipc_kernel_map,
							  addr, size);
					return KERN_NO_SPACE;
				}
			}

			if (entry->is_sub_map)
				nmap = VME_SUBMAP(entry);
			else
				break;

			/* move down to the lower map */

			vm_map_lock_read(nmap);
			vm_map_unlock_read(cmap);
		}

		/* cmap is read-locked; we have a real entry */

		object = VME_OBJECT(entry);
		region.vir_start = (natural_t) entry->vme_start;
		region.vir_end = (natural_t) entry->vme_end;
		region.vir_object = (natural_t)(uintptr_t) object;
		region.vir_offset = VME_OFFSET(entry);
		region.vir_needs_copy = entry->needs_copy;
		region.vir_protection = entry->protection;
		region.vir_max_protection = entry->max_protection;
		region.vir_inheritance = entry->inheritance;
		region.vir_wired_count = entry->wired_count;
		region.vir_user_wired_count = entry->user_wired_count;

		used = 0;
		room = (unsigned int) (size / sizeof(vm_info_object_t));

		if (object == VM_OBJECT_NULL) {
			vm_map_unlock_read(cmap);
			/* no memory needed */
			break;
		}

		vm_object_lock(object);
		vm_map_unlock_read(cmap);

		for (cobject = object;; cobject = nobject) {
			/* cobject is locked */

			if (used < room) {
				vm_info_object_t *vio =
					&((vm_info_object_t *) addr)[used];

				vio->vio_object =
					(natural_t)(uintptr_t) cobject;
				vio->vio_size =
					(natural_t) cobject->vo_size;
				vio->vio_ref_count =
					cobject->ref_count;
				vio->vio_resident_page_count =
					cobject->resident_page_count;
				vio->vio_copy =
					(natural_t)(uintptr_t) cobject->copy;
				vio->vio_shadow =
					(natural_t)(uintptr_t) cobject->shadow;
				vio->vio_shadow_offset =
					(natural_t) cobject->vo_shadow_offset;
				vio->vio_paging_offset =
					(natural_t) cobject->paging_offset;
				vio->vio_copy_strategy =
					cobject->copy_strategy;
				vio->vio_last_alloc =
					(vm_offset_t) cobject->last_alloc;
				vio->vio_paging_in_progress =
					cobject->paging_in_progress +
					cobject->activity_in_progress;
				vio->vio_pager_created =
					cobject->pager_created;
				vio->vio_pager_initialized =
					cobject->pager_initialized;
				vio->vio_pager_ready =
					cobject->pager_ready;
				vio->vio_can_persist =
					cobject->can_persist;
				vio->vio_internal =
					cobject->internal;
				vio->vio_temporary =
					cobject->temporary;
				vio->vio_alive =
					cobject->alive;
				vio->vio_purgable =
					(cobject->purgable != VM_PURGABLE_DENY);
				vio->vio_purgable_volatile =
					(cobject->purgable == VM_PURGABLE_VOLATILE ||
					 cobject->purgable == VM_PURGABLE_EMPTY);
			}

			used++;
			nobject = cobject->shadow;
			if (nobject == VM_OBJECT_NULL) {
				vm_object_unlock(cobject);
				break;
			}

			vm_object_lock(nobject);
			vm_object_unlock(cobject);
		}

		/* nothing locked */

		if (used <= room)
			break;

		/* must allocate more memory */

		if (size != 0)
			kmem_free(ipc_kernel_map, addr, size);
		size = vm_map_round_page(2 * used * sizeof(vm_info_object_t),
					 VM_MAP_PAGE_MASK(ipc_kernel_map));

		kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
		if (kr != KERN_SUCCESS)
			return KERN_RESOURCE_SHORTAGE;

		kr = vm_map_wire(
			ipc_kernel_map,
			vm_map_trunc_page(addr,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			vm_map_round_page(addr + size,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			VM_PROT_READ|VM_PROT_WRITE,
			FALSE);
		assert(kr == KERN_SUCCESS);
	}

	/* free excess memory; make remaining memory pageable */

	if (used == 0) {
		copy = VM_MAP_COPY_NULL;

		if (size != 0)
			kmem_free(ipc_kernel_map, addr, size);
	} else {
		vm_size_t size_used = (used * sizeof(vm_info_object_t));
		vm_size_t vmsize_used = vm_map_round_page(size_used,
					  VM_MAP_PAGE_MASK(ipc_kernel_map));

		kr = vm_map_unwire(
			ipc_kernel_map,
			vm_map_trunc_page(addr,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			vm_map_round_page(addr + size_used,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			FALSE);
		assert(kr == KERN_SUCCESS);

		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
				   (vm_map_size_t)size_used, TRUE, &copy);
		assert(kr == KERN_SUCCESS);

		if (size != vmsize_used)
			kmem_free(ipc_kernel_map,
				  addr + vmsize_used, size - vmsize_used);
	}

	*regionp = region;
	*objectsp = (vm_info_object_array_t) copy;
	*objectsCntp = used;
	return KERN_SUCCESS;
#endif /* MACH_VM_DEBUG */
}
Example #6
0
int
memory_object_control_uiomove(
	memory_object_control_t	control,
	memory_object_offset_t	offset,
	void		*	uio,
	int			start_offset,
	int			io_requested,
	int			mark_dirty,
	int			take_reference)
{
	vm_object_t		object;
	vm_page_t		dst_page;
	int			xsize;
	int			retval = 0;
	int			cur_run;
	int			cur_needed;
	int			i;
	int			orig_offset;
	boolean_t		make_lru = FALSE;
	vm_page_t		page_run[MAX_RUN];

	object = memory_object_control_to_vm_object(control);
	if (object == VM_OBJECT_NULL) {
		return (0);
	}
	assert(!object->internal);

	vm_object_lock(object);

	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
		/*
		 * We can't modify the pages without honoring
		 * copy-on-write obligations first, so fall off
		 * this optimized path and fall back to the regular
		 * path.
		 */
		vm_object_unlock(object);
		return 0;
	}
	orig_offset = start_offset;
	    
	while (io_requested && retval == 0) {

		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;

		if (cur_needed > MAX_RUN)
		        cur_needed = MAX_RUN;

		for (cur_run = 0; cur_run < cur_needed; ) {

		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
			        break;
			/*
			 * Sync up on getting the busy bit
			 */
			if ((dst_page->busy || dst_page->cleaning)) {
			        /*
				 * someone else is playing with the page... if we've
				 * already collected pages into this run, go ahead
				 * and process now, we can't block on this
				 * page while holding other pages in the BUSY state
				 * otherwise we will wait
				 */
			        if (cur_run)
				        break;
			        PAGE_SLEEP(object, dst_page, THREAD_UNINT);
				continue;
			}
			/*
			 * this routine is only called when copying
			 * to/from real files... no need to consider
			 * encrypted swap pages
			 */
			assert(!dst_page->encrypted);

		        if (mark_dirty) {
			        dst_page->dirty = TRUE;
				if (dst_page->cs_validated) {
					/*
					 * CODE SIGNING:
					 * We're modifying a code-signed
					 * page:  assume that it is now tainted.
					 */
					dst_page->cs_tainted = TRUE;
					vm_cs_tainted_forces++;
				}
			}
			dst_page->busy = TRUE;

			page_run[cur_run++] = dst_page;

			offset += PAGE_SIZE_64;
		}
		if (cur_run == 0)
		        /*
			 * we hit a 'hole' in the cache
			 * we bail at this point
			 * we'll unlock the object below
			 */
		        break;
		vm_object_unlock(object);

		for (i = 0; i < cur_run; i++) {
		  
		        dst_page = page_run[i];

			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
			        xsize = io_requested;

			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
			        break;

			io_requested -= xsize;
			start_offset = 0;
		}
		vm_object_lock(object);

		/*
		 * if we have more than 1 page to work on
		 * in the current run, or the original request
		 * started at offset 0 of the page, or we're
		 * processing multiple batches, we will move
		 * the pages to the tail of the inactive queue
		 * to implement an LRU for read/write accesses
		 *
		 * the check for orig_offset == 0 is there to 
		 * mitigate the cost of small (< page_size) requests
		 * to the same page (this way we only move it once)
		 */
		if (take_reference && (cur_run > 1 || orig_offset == 0)) {
			vm_page_lockspin_queues();
			make_lru = TRUE;
		}
		for (i = 0; i < cur_run; i++) {
		        dst_page = page_run[i];

			/*
			 * someone is explicitly referencing this page...
			 * update clustered and speculative state
			 * 
			 */
			VM_PAGE_CONSUME_CLUSTERED(dst_page);

			if (make_lru == TRUE)
				vm_page_lru(dst_page);

			PAGE_WAKEUP_DONE(dst_page);
		}
		if (make_lru == TRUE) {
			vm_page_unlock_queues();
			make_lru = FALSE;
		}
		orig_offset = 0;
	}
	vm_object_unlock(object);

	return (retval);
}
Example #7
0
static int
fill_vnodeinfoforaddr(
	vm_map_entry_t			entry,
	uint32_t * vnodeaddr,
	uint32_t * vid)
{
	vm_object_t	top_object, object;
	memory_object_t memory_object;
	memory_object_pager_ops_t pager_ops;
	kern_return_t	kr;
	int		shadow_depth;


	if (entry->is_sub_map) {
		return(0);
	} else {
		/*
		 * The last object in the shadow chain has the
		 * relevant pager information.
		 */
		top_object = entry->object.vm_object;
		if (top_object == VM_OBJECT_NULL) {
			object = VM_OBJECT_NULL;
			shadow_depth = 0;
		} else {
			vm_object_lock(top_object);
			for (object = top_object, shadow_depth = 0;
			     object->shadow != VM_OBJECT_NULL;
			     object = object->shadow, shadow_depth++) {
				vm_object_lock(object->shadow);
				vm_object_unlock(object);
			}
		}
	}

	if (object == VM_OBJECT_NULL) {
		return(0);
	} else if (object->internal) {
		vm_object_unlock(object);
		return(0);
	} else if (! object->pager_ready ||
		   object->terminating ||
		   ! object->alive) {
		vm_object_unlock(object);
		return(0);
	} else {
		memory_object = object->pager;
		pager_ops = memory_object->mo_pager_ops;
		if (pager_ops == &vnode_pager_ops) {
			kr = vnode_pager_get_object_vnode(
				memory_object,
				vnodeaddr, vid);
			if (kr != KERN_SUCCESS) {
				vm_object_unlock(object);
				return(0);
			}
		} else {
			vm_object_unlock(object);
			return(0);
		}
	}
	vm_object_unlock(object);
	return(1);
}
Example #8
0
int
memory_object_control_uiomove(
	memory_object_control_t	control,
	memory_object_offset_t	offset,
	void		*	uio,
	int			start_offset,
	int			io_requested,
	int			mark_dirty,
	int			take_reference)
{
	vm_object_t		object;
	vm_page_t		dst_page;
	int			xsize;
	int			retval = 0;
	int			cur_run;
	int			cur_needed;
	int			i;
	int			orig_offset;
	vm_page_t		page_run[MAX_RUN];

	object = memory_object_control_to_vm_object(control);
	if (object == VM_OBJECT_NULL) {
		return (0);
	}
	assert(!object->internal);

	vm_object_lock(object);

	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
		/*
		 * We can't modify the pages without honoring
		 * copy-on-write obligations first, so fall off
		 * this optimized path and fall back to the regular
		 * path.
		 */
		vm_object_unlock(object);
		return 0;
	}
	orig_offset = start_offset;
	    
	while (io_requested && retval == 0) {

		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;

		if (cur_needed > MAX_RUN)
		        cur_needed = MAX_RUN;

		for (cur_run = 0; cur_run < cur_needed; ) {

		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
			        break;

			/*
			 * if we're in this routine, we are inside a filesystem's
			 * locking model, so we don't ever want to wait for pages that have
			 * list_req_pending == TRUE since it means that the
			 * page is a candidate for some type of I/O operation,
			 * but that it has not yet been gathered into a UPL...
			 * this implies that it is still outside the domain
			 * of the filesystem and that whoever is responsible for
			 * grabbing it into a UPL may be stuck behind the filesystem
			 * lock this thread owns, or trying to take a lock exclusively
			 * and waiting for the readers to drain from a rw lock...
			 * if we block in those cases, we will deadlock
			 */
			if (dst_page->list_req_pending) {

				if (dst_page->absent) {
					/*
					 * this is the list_req_pending | absent | busy case
					 * which originates from vm_fault_page... we want
					 * to fall out of the fast path and go back
					 * to the caller which will gather this page
					 * into a UPL and issue the I/O if no one
					 * else beats us to it
					 */
					break;
				}
				if (dst_page->pageout || dst_page->cleaning) {
					/*
					 * this is the list_req_pending | pageout | busy case
					 * or the list_req_pending | cleaning case...
					 * which originate from the pageout_scan and
					 * msync worlds for the pageout case and the hibernate
					 * pre-cleaning world for the cleaning case...
					 * we need to reset the state of this page to indicate
					 * it should stay in the cache marked dirty... nothing else we
					 * can do at this point... we can't block on it, we can't busy
					 * it and we can't clean it from this routine.
					 */
					vm_page_lockspin_queues();

					vm_pageout_queue_steal(dst_page, TRUE); 
					vm_page_deactivate(dst_page);

					vm_page_unlock_queues();
				}
				/*
				 * this is the list_req_pending | cleaning case...
				 * we can go ahead and deal with this page since
				 * its ok for us to mark this page busy... if a UPL
				 * tries to gather this page, it will block until the
				 * busy is cleared, thus allowing us safe use of the page
				 * when we're done with it, we will clear busy and wake
				 * up anyone waiting on it, thus allowing the UPL creation
				 * to finish
				 */

			} else if (dst_page->busy || dst_page->cleaning) {
				/*
				 * someone else is playing with the page... if we've
				 * already collected pages into this run, go ahead
				 * and process now, we can't block on this
				 * page while holding other pages in the BUSY state
				 * otherwise we will wait
				 */
				if (cur_run)
					break;
				PAGE_SLEEP(object, dst_page, THREAD_UNINT);
				continue;
			}

			/*
			 * this routine is only called when copying
			 * to/from real files... no need to consider
			 * encrypted swap pages
			 */
			assert(!dst_page->encrypted);

		        if (mark_dirty) {
			        dst_page->dirty = TRUE;
				if (dst_page->cs_validated && 
				    !dst_page->cs_tainted) {
					/*
					 * CODE SIGNING:
					 * We're modifying a code-signed
					 * page: force revalidate
					 */
					dst_page->cs_validated = FALSE;
#if DEVELOPMENT || DEBUG
                                        vm_cs_validated_resets++;
#endif
					pmap_disconnect(dst_page->phys_page);
				}
			}
			dst_page->busy = TRUE;

			page_run[cur_run++] = dst_page;

			offset += PAGE_SIZE_64;
		}
		if (cur_run == 0)
		        /*
			 * we hit a 'hole' in the cache or
			 * a page we don't want to try to handle,
			 * so bail at this point
			 * we'll unlock the object below
			 */
		        break;
		vm_object_unlock(object);

		for (i = 0; i < cur_run; i++) {
		  
		        dst_page = page_run[i];

			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
			        xsize = io_requested;

			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
			        break;

			io_requested -= xsize;
			start_offset = 0;
		}
		vm_object_lock(object);

		/*
		 * if we have more than 1 page to work on
		 * in the current run, or the original request
		 * started at offset 0 of the page, or we're
		 * processing multiple batches, we will move
		 * the pages to the tail of the inactive queue
		 * to implement an LRU for read/write accesses
		 *
		 * the check for orig_offset == 0 is there to 
		 * mitigate the cost of small (< page_size) requests
		 * to the same page (this way we only move it once)
		 */
		if (take_reference && (cur_run > 1 || orig_offset == 0)) {

			vm_page_lockspin_queues();

			for (i = 0; i < cur_run; i++)
				vm_page_lru(page_run[i]);

			vm_page_unlock_queues();
		}
		for (i = 0; i < cur_run; i++) {
		        dst_page = page_run[i];

			/*
			 * someone is explicitly referencing this page...
			 * update clustered and speculative state
			 * 
			 */
			VM_PAGE_CONSUME_CLUSTERED(dst_page);

			PAGE_WAKEUP_DONE(dst_page);
		}
		orig_offset = 0;
	}
	vm_object_unlock(object);

	return (retval);
}
Example #9
0
/*
 *	vm_pageout_scan does the dirty work for the pageout daemon.
 */
void
vm_pageout_scan()
{
	register vm_page_t	m, next;
	register int		page_shortage;
	register int		s;
	register int		pages_freed;
	int			free;
	vm_object_t		object;

	/*
	 *	Only continue when we want more pages to be "free"
	 */

	cnt.v_rev++;

	s = splimp();
	simple_lock(&vm_page_queue_free_lock);
	free = cnt.v_free_count;
	simple_unlock(&vm_page_queue_free_lock);
	splx(s);

	if (free < cnt.v_free_target) {
		swapout_threads();

		/*
		 *	Be sure the pmap system is updated so
		 *	we can scan the inactive queue.
		 */

		pmap_update();
	}

	/*
	 *	Acquire the resident page system lock,
	 *	as we may be changing what's resident quite a bit.
	 */
	vm_page_lock_queues();

	/*
	 *	Start scanning the inactive queue for pages we can free.
	 *	We keep scanning until we have enough free pages or
	 *	we have scanned through the entire queue.  If we
	 *	encounter dirty pages, we start cleaning them.
	 */

	pages_freed = 0;
	for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
		s = splimp();
		simple_lock(&vm_page_queue_free_lock);
		free = cnt.v_free_count;
		simple_unlock(&vm_page_queue_free_lock);
		splx(s);
		if (free >= cnt.v_free_target)
			break;

		cnt.v_scan++;
		next = m->pageq.tqe_next;

		/*
		 * If the page has been referenced, move it back to the
		 * active queue.
		 */
		if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
			vm_page_activate(m);
			cnt.v_reactivated++;
			continue;
		}

		/*
		 * If the page is clean, free it up.
		 */
		if (m->flags & PG_CLEAN) {
			object = m->object;
			if (vm_object_lock_try(object)) {
				pmap_page_protect(VM_PAGE_TO_PHYS(m),
						  VM_PROT_NONE);
				vm_page_free(m);
				pages_freed++;
				cnt.v_dfree++;
				vm_object_unlock(object);
			}
			continue;
		}

		/*
		 * If the page is dirty but already being washed, skip it.
		 */
		if ((m->flags & PG_LAUNDRY) == 0)
			continue;

		/*
		 * Otherwise the page is dirty and still in the laundry,
		 * so we start the cleaning operation and remove it from
		 * the laundry.
		 */
		object = m->object;
		if (!vm_object_lock_try(object))
			continue;
		cnt.v_pageouts++;
#ifdef CLUSTERED_PAGEOUT
		if (object->pager &&
		    vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
			vm_pageout_cluster(m, object);
		else
#endif
		vm_pageout_page(m, object);
		thread_wakeup((int) object);
		vm_object_unlock(object);
		/*
		 * Former next page may no longer even be on the inactive
		 * queue (due to potential blocking in the pager with the
		 * queues unlocked).  If it isn't, we just start over.
		 */
		if (next && (next->flags & PG_INACTIVE) == 0)
			next = vm_page_queue_inactive.tqh_first;
	}
	
	/*
	 *	Compute the page shortage.  If we are still very low on memory
	 *	be sure that we will move a minimal amount of pages from active
	 *	to inactive.
	 */

	page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
	if (page_shortage <= 0 && pages_freed == 0)
		page_shortage = 1;

	while (page_shortage > 0) {
		/*
		 *	Move some more pages from active to inactive.
		 */

		if ((m = vm_page_queue_active.tqh_first) == NULL)
			break;
		vm_page_deactivate(m);
		page_shortage--;
	}

	vm_page_unlock_queues();
}
Example #10
0
/*
 *	kmem_realloc:
 *
 *	Reallocate wired-down memory in the kernel's address map
 *	or a submap.  Newly allocated pages are not zeroed.
 *	This can only be used on regions allocated with kmem_alloc.
 *
 *	If successful, the pages in the old region are mapped twice.
 *	The old region is unchanged.  Use kmem_free to get rid of it.
 */
kern_return_t kmem_realloc(
	vm_map_t 	map,
	vm_offset_t 	oldaddr,
	vm_size_t 	oldsize,
	vm_offset_t 	*newaddrp,
	vm_size_t 	newsize)
{
	vm_offset_t oldmin, oldmax;
	vm_offset_t newaddr;
	vm_object_t object;
	vm_map_entry_t oldentry, newentry;
	unsigned int attempts;
	kern_return_t kr;

	oldmin = trunc_page(oldaddr);
	oldmax = round_page(oldaddr + oldsize);
	oldsize = oldmax - oldmin;
	newsize = round_page(newsize);

	/*
	 *	Find space for the new region.
	 */

	attempts = 0;

retry:
	vm_map_lock(map);
	kr = vm_map_find_entry(map, &newaddr, newsize, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &newentry);
	if (kr != KERN_SUCCESS) {
		vm_map_unlock(map);

		if (attempts == 0) {
			attempts++;
			slab_collect();
			goto retry;
		}

		printf_once("no more room for kmem_realloc in %p\n", map);
		return kr;
	}

	/*
	 *	Find the VM object backing the old region.
	 */

	if (!vm_map_lookup_entry(map, oldmin, &oldentry))
		panic("kmem_realloc");
	object = oldentry->object.vm_object;

	/*
	 *	Increase the size of the object and
	 *	fill in the new region.
	 */

	vm_object_reference(object);
	vm_object_lock(object);
	if (object->size != oldsize)
		panic("kmem_realloc");
	object->size = newsize;
	vm_object_unlock(object);

	newentry->object.vm_object = object;
	newentry->offset = 0;

	/*
	 *	Since we have not given out this address yet,
	 *	it is safe to unlock the map.  We are trusting
	 *	that nobody will play with either region.
	 */

	vm_map_unlock(map);

	/*
	 *	Remap the pages in the old region and
	 *	allocate more pages for the new region.
	 */

	kmem_remap_pages(object, 0,
			 newaddr, newaddr + oldsize,
			 VM_PROT_DEFAULT);
	kmem_alloc_pages(object, oldsize,
			 newaddr + oldsize, newaddr + newsize,
			 VM_PROT_DEFAULT);

	*newaddrp = newaddr;
	return KERN_SUCCESS;
}
Example #11
0
kern_return_t
kmem_alloc_contig(
	vm_map_t		map,
	vm_offset_t		*addrp,
	vm_size_t		size,
	vm_offset_t 		mask,
	ppnum_t			max_pnum,
	ppnum_t			pnum_mask,
	int 			flags)
{
	vm_object_t		object;
	vm_object_offset_t	offset;
	vm_map_offset_t		map_addr; 
	vm_map_offset_t		map_mask;
	vm_map_size_t		map_size, i;
	vm_map_entry_t		entry;
	vm_page_t		m, pages;
	kern_return_t		kr;

	if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) 
		return KERN_INVALID_ARGUMENT;
	
	if (size == 0) {
		*addrp = 0;
		return KERN_INVALID_ARGUMENT;
	}

	map_size = vm_map_round_page(size);
	map_mask = (vm_map_offset_t)mask;

	/*
	 *	Allocate a new object (if necessary) and the reference we
	 *	will be donating to the map entry.  We must do this before
	 *	locking the map, or risk deadlock with the default pager.
	 */
	if ((flags & KMA_KOBJECT) != 0) {
		object = kernel_object;
		vm_object_reference(object);
	} else {
		object = vm_object_allocate(map_size);
	}

	kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
	if (KERN_SUCCESS != kr) {
		vm_object_deallocate(object);
		return kr;
	}

	entry->object.vm_object = object;
	entry->offset = offset = (object == kernel_object) ? 
		        map_addr : 0;

	/* Take an extra object ref in case the map entry gets deleted */
	vm_object_reference(object);
	vm_map_unlock(map);

	kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);

	if (kr != KERN_SUCCESS) {
		vm_map_remove(map, vm_map_trunc_page(map_addr),
			      vm_map_round_page(map_addr + map_size), 0);
		vm_object_deallocate(object);
		*addrp = 0;
		return kr;
	}

	vm_object_lock(object);
	for (i = 0; i < map_size; i += PAGE_SIZE) {
		m = pages;
		pages = NEXT_PAGE(m);
		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
		m->busy = FALSE;
		vm_page_insert(m, object, offset + i);
	}
	vm_object_unlock(object);

	if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr),
			      vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE)) 
		!= KERN_SUCCESS) {
		if (object == kernel_object) {
			vm_object_lock(object);
			vm_object_page_remove(object, offset, offset + map_size);
			vm_object_unlock(object);
		}
		vm_map_remove(map, vm_map_trunc_page(map_addr), 
			      vm_map_round_page(map_addr + map_size), 0);
		vm_object_deallocate(object);
		return kr;
	}
	vm_object_deallocate(object);

	if (object == kernel_object)
		vm_map_simplify(map, map_addr);

	*addrp = (vm_offset_t) map_addr;
	assert((vm_map_offset_t) *addrp == map_addr);
	return KERN_SUCCESS;
}
Example #12
0
kern_return_t
kernel_memory_allocate(
	register vm_map_t	map,
	register vm_offset_t	*addrp,
	register vm_size_t	size,
	register vm_offset_t	mask,
	int			flags,
	vm_tag_t                tag)
{
	vm_object_t 		object;
	vm_object_offset_t 	offset;
	vm_object_offset_t 	pg_offset;
	vm_map_entry_t 		entry = NULL;
	vm_map_offset_t 	map_addr, fill_start;
	vm_map_offset_t		map_mask;
	vm_map_size_t		map_size, fill_size;
	kern_return_t 		kr, pe_result;
	vm_page_t		mem;
	vm_page_t		guard_page_list = NULL;
	vm_page_t		wired_page_list = NULL;
	int			guard_page_count = 0;
	int			wired_page_count = 0;
	int			i;
	int			vm_alloc_flags;
	vm_prot_t		kma_prot;

	if (! vm_kernel_ready) {
		panic("kernel_memory_allocate: VM is not ready");
	}

	map_size = vm_map_round_page(size,
				     VM_MAP_PAGE_MASK(map));
	map_mask = (vm_map_offset_t) mask;

	vm_alloc_flags = VM_MAKE_TAG(tag);

	/* Check for zero allocation size (either directly or via overflow) */
	if (map_size == 0) {
		*addrp = 0;
		return KERN_INVALID_ARGUMENT;
	}

	/*
	 * limit the size of a single extent of wired memory
	 * to try and limit the damage to the system if
	 * too many pages get wired down
	 * limit raised to 2GB with 128GB max physical limit
	 */
        if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) {
                return KERN_RESOURCE_SHORTAGE;
        }

	/*
	 * Guard pages:
	 *
	 * Guard pages are implemented as ficticious pages.  By placing guard pages
	 * on either end of a stack, they can help detect cases where a thread walks
	 * off either end of its stack.  They are allocated and set up here and attempts
	 * to access those pages are trapped in vm_fault_page().
	 *
	 * The map_size we were passed may include extra space for
	 * guard pages.  If those were requested, then back it out of fill_size
	 * since vm_map_find_space() takes just the actual size not including
	 * guard pages.  Similarly, fill_start indicates where the actual pages
	 * will begin in the range.
	 */

	fill_start = 0;
	fill_size = map_size;

	if (flags & KMA_GUARD_FIRST) {
		vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
		fill_start += PAGE_SIZE_64;
		fill_size -= PAGE_SIZE_64;
		if (map_size < fill_start + fill_size) {
			/* no space for a guard page */
			*addrp = 0;
			return KERN_INVALID_ARGUMENT;
		}
		guard_page_count++;
	}
	if (flags & KMA_GUARD_LAST) {
		vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
		fill_size -= PAGE_SIZE_64;
		if (map_size <= fill_start + fill_size) {
			/* no space for a guard page */
			*addrp = 0;
			return KERN_INVALID_ARGUMENT;
		}
		guard_page_count++;
	}
	wired_page_count = (int) (fill_size / PAGE_SIZE_64);
	assert(wired_page_count * PAGE_SIZE_64 == fill_size);

	for (i = 0; i < guard_page_count; i++) {
		for (;;) {
			mem = vm_page_grab_guard();

			if (mem != VM_PAGE_NULL)
				break;
			if (flags & KMA_NOPAGEWAIT) {
				kr = KERN_RESOURCE_SHORTAGE;
				goto out;
			}
			vm_page_more_fictitious();
		}
		mem->pageq.next = (queue_entry_t)guard_page_list;
		guard_page_list = mem;
	}

	if (! (flags & KMA_VAONLY)) {
	for (i = 0; i < wired_page_count; i++) {
		uint64_t	unavailable;
		
		for (;;) {
		        if (flags & KMA_LOMEM)
			        mem = vm_page_grablo();
			else
			        mem = vm_page_grab();

		        if (mem != VM_PAGE_NULL)
			        break;

			if (flags & KMA_NOPAGEWAIT) {
				kr = KERN_RESOURCE_SHORTAGE;
				goto out;
			}
			if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
				kr = KERN_RESOURCE_SHORTAGE;
				goto out;
			}
			unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;

			if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
				kr = KERN_RESOURCE_SHORTAGE;
				goto out;
			}
			VM_PAGE_WAIT();
		}
		mem->pageq.next = (queue_entry_t)wired_page_list;
		wired_page_list = mem;
	}
	}

	/*
	 *	Allocate a new object (if necessary).  We must do this before
	 *	locking the map, or risk deadlock with the default pager.
	 */
	if ((flags & KMA_KOBJECT) != 0) {
		object = kernel_object;
		vm_object_reference(object);
	} else if ((flags & KMA_COMPRESSOR) != 0) {
		object = compressor_object;
		vm_object_reference(object);
	} else {
		object = vm_object_allocate(map_size);
	}

	kr = vm_map_find_space(map, &map_addr,
			       fill_size, map_mask,
			       vm_alloc_flags, &entry);
	if (KERN_SUCCESS != kr) {
		vm_object_deallocate(object);
		goto out;
	}

	if (object == kernel_object || object == compressor_object) {
		offset = map_addr;
	} else {
		offset = 0;
	}
	VME_OBJECT_SET(entry, object);
	VME_OFFSET_SET(entry, offset);
	
	if (object != compressor_object)
		entry->wired_count++;

	if (flags & KMA_PERMANENT)
		entry->permanent = TRUE;

	if (object != kernel_object && object != compressor_object)
		vm_object_reference(object);

	vm_object_lock(object);
	vm_map_unlock(map);

	pg_offset = 0;

	if (fill_start) {
		if (guard_page_list == NULL)
			panic("kernel_memory_allocate: guard_page_list == NULL");

		mem = guard_page_list;
		guard_page_list = (vm_page_t)mem->pageq.next;
		mem->pageq.next = NULL;

		vm_page_insert(mem, object, offset + pg_offset);

		mem->busy = FALSE;
		pg_offset += PAGE_SIZE_64;
	}

	kma_prot = VM_PROT_READ | VM_PROT_WRITE;

	if (flags & KMA_VAONLY) {
		pg_offset = fill_start + fill_size;
	} else {
	for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
		if (wired_page_list == NULL)
			panic("kernel_memory_allocate: wired_page_list == NULL");

		mem = wired_page_list;
		wired_page_list = (vm_page_t)mem->pageq.next;
		mem->pageq.next = NULL;
		mem->wire_count++;

		vm_page_insert_wired(mem, object, offset + pg_offset, tag);

		mem->busy = FALSE;
		mem->pmapped = TRUE;
		mem->wpmapped = TRUE;

		PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem,
				   kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
				   PMAP_OPTIONS_NOWAIT, pe_result);

		if (pe_result == KERN_RESOURCE_SHORTAGE) {
			vm_object_unlock(object);

			PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 
				   kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);

			vm_object_lock(object);
		}
		if (flags & KMA_NOENCRYPT) {
			bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);

			pmap_set_noencrypt(mem->phys_page);
		}
	}
	}