/* * Allocate new wired pages in an object. * The object is assumed to be mapped into the kernel map or * a submap. */ void kmem_alloc_pages( vm_object_t object, vm_offset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t protection) { /* * Mark the pmap region as not pageable. */ pmap_pageable(kernel_pmap, start, end, FALSE); while (start < end) { vm_page_t mem; vm_object_lock(object); /* * Allocate a page */ while ((mem = vm_page_alloc(object, offset)) == VM_PAGE_NULL) { vm_object_unlock(object); VM_PAGE_WAIT((void (*)()) 0); vm_object_lock(object); } /* * Wire it down */ vm_page_lock_queues(); vm_page_wire(mem); vm_page_unlock_queues(); vm_object_unlock(object); /* * Enter it in the kernel pmap */ PMAP_ENTER(kernel_pmap, start, mem, protection, TRUE); vm_object_lock(object); PAGE_WAKEUP_DONE(mem); vm_object_unlock(object); start += PAGE_SIZE; offset += PAGE_SIZE; } }
vm_object_t find_vnode_object( vm_map_entry_t entry ) { vm_object_t top_object, object; memory_object_t memory_object; memory_object_pager_ops_t pager_ops; if (!entry->is_sub_map) { /* * The last object in the shadow chain has the * relevant pager information. */ top_object = entry->object.vm_object; if (top_object) { vm_object_lock(top_object); for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) { vm_object_lock(object->shadow); vm_object_unlock(object); } if (object && !object->internal && object->pager_ready && !object->terminating && object->alive) { memory_object = object->pager; pager_ops = memory_object->mo_pager_ops; /* * If this object points to the vnode_pager_ops, then we found what we're * looking for. Otherwise, this vm_map_entry doesn't have an underlying * vnode and so we fall through to the bottom and return NULL. */ if (pager_ops == &vnode_pager_ops) return object; /* we return with the object locked */ } vm_object_unlock(object); } } return(VM_OBJECT_NULL); }
/* * osi_ubc_flush_dirty_and_wait -- ensure all dirty pages cleaned * * Alpha OSF/1 doesn't make it easy to wait for all dirty pages to be cleaned. * NFS tries to do this by calling waitforio(), which waits for v_numoutput * to go to zero. But that isn't good enough, because afs_putpage() doesn't * increment v_numoutput until it has obtained the vcache entry lock. Suppose * that Process A, trying to flush a page, is waiting for that lock, and * Process B tries to close the file. Process B calls waitforio() which thinks * that everything is cool because v_numoutput is still zero. Process B then * proceeds to call afs_StoreAllSegments(). Finally when B is finished, A gets * to proceed and flush its page. But then it's too late because the file is * already closed. * * (I suspect that waitforio() is not adequate for NFS, just as it isn't * adequate for us. But that's not my problem.) * * The only way we can be sure that there are no more dirty pages is if there * are no more pages with pg_busy set. We look for them on the cleanpl. * * For some reason, ubc_flush_dirty() only looks at the dirtypl, not the * dirtywpl. I don't know why this is good enough, but I assume it is. By * the same token, I only look for busy pages on the cleanpl, not the cleanwpl. * * Called with the global lock NOT held. */ static void osi_ubc_flush_dirty_and_wait(struct vnode *vp, int flags) { int retry; vm_page_t pp; int first; #ifdef SECRETLY_OSF1 do { struct vm_ubc_object *vop; vop = (struct vm_ubc_object *)(vp->v_object); ubc_flush_dirty(vop, flags); vm_object_lock(vop); if (vop->vu_dirtypl) /* shouldn't happen, but who knows */ retry = 1; else { retry = 0; if (vop->vu_cleanpl) { for (first = 1, pp = vop->vu_cleanpl; first || pp != vop->vu_cleanpl; first = 0, pp = pp->pg_onext) { if (pp->pg_busy) { retry = 1; pp->pg_wait = 1; assert_wait_mesg((vm_offset_t) pp, FALSE, "pg_wait"); vm_object_unlock(vop); thread_block(); break; } } } if (retry) continue; } vm_object_unlock(vop); } while (retry); #endif /* SECRETLY_OSF1 */ }
/* * Remap wired pages in an object into a new region. * The object is assumed to be mapped into the kernel map or * a submap. */ void kmem_remap_pages( vm_object_t object, vm_offset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t protection) { /* * Mark the pmap region as not pageable. */ pmap_pageable(kernel_pmap, start, end, FALSE); while (start < end) { vm_page_t mem; vm_object_lock(object); /* * Find a page */ if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL) panic("kmem_remap_pages"); /* * Wire it down (again) */ vm_page_lock_queues(); vm_page_wire(mem); vm_page_unlock_queues(); vm_object_unlock(object); /* * Enter it in the kernel pmap. The page isn't busy, * but this shouldn't be a problem because it is wired. */ PMAP_ENTER(kernel_pmap, start, mem, protection, TRUE); start += PAGE_SIZE; offset += PAGE_SIZE; } }
kern_return_t vm32_region_info_64( __DEBUG_ONLY vm_map_t map, __DEBUG_ONLY vm32_offset_t address, __DEBUG_ONLY vm_info_region_64_t *regionp, __DEBUG_ONLY vm_info_object_array_t *objectsp, __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else vm_map_copy_t copy; vm_offset_t addr = 0; /* memory for OOL data */ vm_size_t size; /* size of the memory */ unsigned int room; /* room for this many objects */ unsigned int used; /* actually this many objects */ vm_info_region_64_t region; kern_return_t kr; if (map == VM_MAP_NULL) return KERN_INVALID_TASK; size = 0; /* no memory allocated yet */ for (;;) { vm_map_t cmap; /* current map in traversal */ vm_map_t nmap; /* next map to look at */ vm_map_entry_t entry; vm_object_t object, cobject, nobject; /* nothing is locked */ vm_map_lock_read(map); for (cmap = map;; cmap = nmap) { /* cmap is read-locked */ if (!vm_map_lookup_entry(cmap, address, &entry)) { entry = entry->vme_next; if (entry == vm_map_to_entry(cmap)) { vm_map_unlock_read(cmap); if (size != 0) kmem_free(ipc_kernel_map, addr, size); return KERN_NO_SPACE; } } if (entry->is_sub_map) nmap = VME_SUBMAP(entry); else break; /* move down to the lower map */ vm_map_lock_read(nmap); vm_map_unlock_read(cmap); } /* cmap is read-locked; we have a real entry */ object = VME_OBJECT(entry); region.vir_start = (natural_t) entry->vme_start; region.vir_end = (natural_t) entry->vme_end; region.vir_object = (natural_t)(uintptr_t) object; region.vir_offset = VME_OFFSET(entry); region.vir_needs_copy = entry->needs_copy; region.vir_protection = entry->protection; region.vir_max_protection = entry->max_protection; region.vir_inheritance = entry->inheritance; region.vir_wired_count = entry->wired_count; region.vir_user_wired_count = entry->user_wired_count; used = 0; room = (unsigned int) (size / sizeof(vm_info_object_t)); if (object == VM_OBJECT_NULL) { vm_map_unlock_read(cmap); /* no memory needed */ break; } vm_object_lock(object); vm_map_unlock_read(cmap); for (cobject = object;; cobject = nobject) { /* cobject is locked */ if (used < room) { vm_info_object_t *vio = &((vm_info_object_t *) addr)[used]; vio->vio_object = (natural_t)(uintptr_t) cobject; vio->vio_size = (natural_t) cobject->vo_size; vio->vio_ref_count = cobject->ref_count; vio->vio_resident_page_count = cobject->resident_page_count; vio->vio_copy = (natural_t)(uintptr_t) cobject->copy; vio->vio_shadow = (natural_t)(uintptr_t) cobject->shadow; vio->vio_shadow_offset = (natural_t) cobject->vo_shadow_offset; vio->vio_paging_offset = (natural_t) cobject->paging_offset; vio->vio_copy_strategy = cobject->copy_strategy; vio->vio_last_alloc = (vm_offset_t) cobject->last_alloc; vio->vio_paging_in_progress = cobject->paging_in_progress + cobject->activity_in_progress; vio->vio_pager_created = cobject->pager_created; vio->vio_pager_initialized = cobject->pager_initialized; vio->vio_pager_ready = cobject->pager_ready; vio->vio_can_persist = cobject->can_persist; vio->vio_internal = cobject->internal; vio->vio_temporary = cobject->temporary; vio->vio_alive = cobject->alive; vio->vio_purgable = (cobject->purgable != VM_PURGABLE_DENY); vio->vio_purgable_volatile = (cobject->purgable == VM_PURGABLE_VOLATILE || cobject->purgable == VM_PURGABLE_EMPTY); } used++; nobject = cobject->shadow; if (nobject == VM_OBJECT_NULL) { vm_object_unlock(cobject); break; } vm_object_lock(nobject); vm_object_unlock(cobject); } /* nothing locked */ if (used <= room) break; /* must allocate more memory */ if (size != 0) kmem_free(ipc_kernel_map, addr, size); size = vm_map_round_page(2 * used * sizeof(vm_info_object_t), VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; kr = vm_map_wire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); } /* free excess memory; make remaining memory pageable */ if (used == 0) { copy = VM_MAP_COPY_NULL; if (size != 0) kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = (used * sizeof(vm_info_object_t)); vm_size_t vmsize_used = vm_map_round_page(size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size_used, TRUE, ©); assert(kr == KERN_SUCCESS); if (size != vmsize_used) kmem_free(ipc_kernel_map, addr + vmsize_used, size - vmsize_used); } *regionp = region; *objectsp = (vm_info_object_array_t) copy; *objectsCntp = used; return KERN_SUCCESS; #endif /* MACH_VM_DEBUG */ }
int memory_object_control_uiomove( memory_object_control_t control, memory_object_offset_t offset, void * uio, int start_offset, int io_requested, int mark_dirty, int take_reference) { vm_object_t object; vm_page_t dst_page; int xsize; int retval = 0; int cur_run; int cur_needed; int i; int orig_offset; boolean_t make_lru = FALSE; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { return (0); } assert(!object->internal); vm_object_lock(object); if (mark_dirty && object->copy != VM_OBJECT_NULL) { /* * We can't modify the pages without honoring * copy-on-write obligations first, so fall off * this optimized path and fall back to the regular * path. */ vm_object_unlock(object); return 0; } orig_offset = start_offset; while (io_requested && retval == 0) { cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; if (cur_needed > MAX_RUN) cur_needed = MAX_RUN; for (cur_run = 0; cur_run < cur_needed; ) { if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; /* * Sync up on getting the busy bit */ if ((dst_page->busy || dst_page->cleaning)) { /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ if (cur_run) break; PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } /* * this routine is only called when copying * to/from real files... no need to consider * encrypted swap pages */ assert(!dst_page->encrypted); if (mark_dirty) { dst_page->dirty = TRUE; if (dst_page->cs_validated) { /* * CODE SIGNING: * We're modifying a code-signed * page: assume that it is now tainted. */ dst_page->cs_tainted = TRUE; vm_cs_tainted_forces++; } } dst_page->busy = TRUE; page_run[cur_run++] = dst_page; offset += PAGE_SIZE_64; } if (cur_run == 0) /* * we hit a 'hole' in the cache * we bail at this point * we'll unlock the object below */ break; vm_object_unlock(object); for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) break; io_requested -= xsize; start_offset = 0; } vm_object_lock(object); /* * if we have more than 1 page to work on * in the current run, or the original request * started at offset 0 of the page, or we're * processing multiple batches, we will move * the pages to the tail of the inactive queue * to implement an LRU for read/write accesses * * the check for orig_offset == 0 is there to * mitigate the cost of small (< page_size) requests * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { vm_page_lockspin_queues(); make_lru = TRUE; } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; /* * someone is explicitly referencing this page... * update clustered and speculative state * */ VM_PAGE_CONSUME_CLUSTERED(dst_page); if (make_lru == TRUE) vm_page_lru(dst_page); PAGE_WAKEUP_DONE(dst_page); } if (make_lru == TRUE) { vm_page_unlock_queues(); make_lru = FALSE; } orig_offset = 0; } vm_object_unlock(object); return (retval); }
static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid) { vm_object_t top_object, object; memory_object_t memory_object; memory_object_pager_ops_t pager_ops; kern_return_t kr; int shadow_depth; if (entry->is_sub_map) { return(0); } else { /* * The last object in the shadow chain has the * relevant pager information. */ top_object = entry->object.vm_object; if (top_object == VM_OBJECT_NULL) { object = VM_OBJECT_NULL; shadow_depth = 0; } else { vm_object_lock(top_object); for (object = top_object, shadow_depth = 0; object->shadow != VM_OBJECT_NULL; object = object->shadow, shadow_depth++) { vm_object_lock(object->shadow); vm_object_unlock(object); } } } if (object == VM_OBJECT_NULL) { return(0); } else if (object->internal) { vm_object_unlock(object); return(0); } else if (! object->pager_ready || object->terminating || ! object->alive) { vm_object_unlock(object); return(0); } else { memory_object = object->pager; pager_ops = memory_object->mo_pager_ops; if (pager_ops == &vnode_pager_ops) { kr = vnode_pager_get_object_vnode( memory_object, vnodeaddr, vid); if (kr != KERN_SUCCESS) { vm_object_unlock(object); return(0); } } else { vm_object_unlock(object); return(0); } } vm_object_unlock(object); return(1); }
int memory_object_control_uiomove( memory_object_control_t control, memory_object_offset_t offset, void * uio, int start_offset, int io_requested, int mark_dirty, int take_reference) { vm_object_t object; vm_page_t dst_page; int xsize; int retval = 0; int cur_run; int cur_needed; int i; int orig_offset; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { return (0); } assert(!object->internal); vm_object_lock(object); if (mark_dirty && object->copy != VM_OBJECT_NULL) { /* * We can't modify the pages without honoring * copy-on-write obligations first, so fall off * this optimized path and fall back to the regular * path. */ vm_object_unlock(object); return 0; } orig_offset = start_offset; while (io_requested && retval == 0) { cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; if (cur_needed > MAX_RUN) cur_needed = MAX_RUN; for (cur_run = 0; cur_run < cur_needed; ) { if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; /* * if we're in this routine, we are inside a filesystem's * locking model, so we don't ever want to wait for pages that have * list_req_pending == TRUE since it means that the * page is a candidate for some type of I/O operation, * but that it has not yet been gathered into a UPL... * this implies that it is still outside the domain * of the filesystem and that whoever is responsible for * grabbing it into a UPL may be stuck behind the filesystem * lock this thread owns, or trying to take a lock exclusively * and waiting for the readers to drain from a rw lock... * if we block in those cases, we will deadlock */ if (dst_page->list_req_pending) { if (dst_page->absent) { /* * this is the list_req_pending | absent | busy case * which originates from vm_fault_page... we want * to fall out of the fast path and go back * to the caller which will gather this page * into a UPL and issue the I/O if no one * else beats us to it */ break; } if (dst_page->pageout || dst_page->cleaning) { /* * this is the list_req_pending | pageout | busy case * or the list_req_pending | cleaning case... * which originate from the pageout_scan and * msync worlds for the pageout case and the hibernate * pre-cleaning world for the cleaning case... * we need to reset the state of this page to indicate * it should stay in the cache marked dirty... nothing else we * can do at this point... we can't block on it, we can't busy * it and we can't clean it from this routine. */ vm_page_lockspin_queues(); vm_pageout_queue_steal(dst_page, TRUE); vm_page_deactivate(dst_page); vm_page_unlock_queues(); } /* * this is the list_req_pending | cleaning case... * we can go ahead and deal with this page since * its ok for us to mark this page busy... if a UPL * tries to gather this page, it will block until the * busy is cleared, thus allowing us safe use of the page * when we're done with it, we will clear busy and wake * up anyone waiting on it, thus allowing the UPL creation * to finish */ } else if (dst_page->busy || dst_page->cleaning) { /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ if (cur_run) break; PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } /* * this routine is only called when copying * to/from real files... no need to consider * encrypted swap pages */ assert(!dst_page->encrypted); if (mark_dirty) { dst_page->dirty = TRUE; if (dst_page->cs_validated && !dst_page->cs_tainted) { /* * CODE SIGNING: * We're modifying a code-signed * page: force revalidate */ dst_page->cs_validated = FALSE; #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif pmap_disconnect(dst_page->phys_page); } } dst_page->busy = TRUE; page_run[cur_run++] = dst_page; offset += PAGE_SIZE_64; } if (cur_run == 0) /* * we hit a 'hole' in the cache or * a page we don't want to try to handle, * so bail at this point * we'll unlock the object below */ break; vm_object_unlock(object); for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) break; io_requested -= xsize; start_offset = 0; } vm_object_lock(object); /* * if we have more than 1 page to work on * in the current run, or the original request * started at offset 0 of the page, or we're * processing multiple batches, we will move * the pages to the tail of the inactive queue * to implement an LRU for read/write accesses * * the check for orig_offset == 0 is there to * mitigate the cost of small (< page_size) requests * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { vm_page_lockspin_queues(); for (i = 0; i < cur_run; i++) vm_page_lru(page_run[i]); vm_page_unlock_queues(); } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; /* * someone is explicitly referencing this page... * update clustered and speculative state * */ VM_PAGE_CONSUME_CLUSTERED(dst_page); PAGE_WAKEUP_DONE(dst_page); } orig_offset = 0; } vm_object_unlock(object); return (retval); }
/* * vm_pageout_scan does the dirty work for the pageout daemon. */ void vm_pageout_scan() { register vm_page_t m, next; register int page_shortage; register int s; register int pages_freed; int free; vm_object_t object; /* * Only continue when we want more pages to be "free" */ cnt.v_rev++; s = splimp(); simple_lock(&vm_page_queue_free_lock); free = cnt.v_free_count; simple_unlock(&vm_page_queue_free_lock); splx(s); if (free < cnt.v_free_target) { swapout_threads(); /* * Be sure the pmap system is updated so * we can scan the inactive queue. */ pmap_update(); } /* * Acquire the resident page system lock, * as we may be changing what's resident quite a bit. */ vm_page_lock_queues(); /* * Start scanning the inactive queue for pages we can free. * We keep scanning until we have enough free pages or * we have scanned through the entire queue. If we * encounter dirty pages, we start cleaning them. */ pages_freed = 0; for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) { s = splimp(); simple_lock(&vm_page_queue_free_lock); free = cnt.v_free_count; simple_unlock(&vm_page_queue_free_lock); splx(s); if (free >= cnt.v_free_target) break; cnt.v_scan++; next = m->pageq.tqe_next; /* * If the page has been referenced, move it back to the * active queue. */ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { vm_page_activate(m); cnt.v_reactivated++; continue; } /* * If the page is clean, free it up. */ if (m->flags & PG_CLEAN) { object = m->object; if (vm_object_lock_try(object)) { pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); vm_page_free(m); pages_freed++; cnt.v_dfree++; vm_object_unlock(object); } continue; } /* * If the page is dirty but already being washed, skip it. */ if ((m->flags & PG_LAUNDRY) == 0) continue; /* * Otherwise the page is dirty and still in the laundry, * so we start the cleaning operation and remove it from * the laundry. */ object = m->object; if (!vm_object_lock_try(object)) continue; cnt.v_pageouts++; #ifdef CLUSTERED_PAGEOUT if (object->pager && vm_pager_cancluster(object->pager, PG_CLUSTERPUT)) vm_pageout_cluster(m, object); else #endif vm_pageout_page(m, object); thread_wakeup((int) object); vm_object_unlock(object); /* * Former next page may no longer even be on the inactive * queue (due to potential blocking in the pager with the * queues unlocked). If it isn't, we just start over. */ if (next && (next->flags & PG_INACTIVE) == 0) next = vm_page_queue_inactive.tqh_first; } /* * Compute the page shortage. If we are still very low on memory * be sure that we will move a minimal amount of pages from active * to inactive. */ page_shortage = cnt.v_inactive_target - cnt.v_inactive_count; if (page_shortage <= 0 && pages_freed == 0) page_shortage = 1; while (page_shortage > 0) { /* * Move some more pages from active to inactive. */ if ((m = vm_page_queue_active.tqh_first) == NULL) break; vm_page_deactivate(m); page_shortage--; } vm_page_unlock_queues(); }
/* * kmem_realloc: * * Reallocate wired-down memory in the kernel's address map * or a submap. Newly allocated pages are not zeroed. * This can only be used on regions allocated with kmem_alloc. * * If successful, the pages in the old region are mapped twice. * The old region is unchanged. Use kmem_free to get rid of it. */ kern_return_t kmem_realloc( vm_map_t map, vm_offset_t oldaddr, vm_size_t oldsize, vm_offset_t *newaddrp, vm_size_t newsize) { vm_offset_t oldmin, oldmax; vm_offset_t newaddr; vm_object_t object; vm_map_entry_t oldentry, newentry; unsigned int attempts; kern_return_t kr; oldmin = trunc_page(oldaddr); oldmax = round_page(oldaddr + oldsize); oldsize = oldmax - oldmin; newsize = round_page(newsize); /* * Find space for the new region. */ attempts = 0; retry: vm_map_lock(map); kr = vm_map_find_entry(map, &newaddr, newsize, (vm_offset_t) 0, VM_OBJECT_NULL, &newentry); if (kr != KERN_SUCCESS) { vm_map_unlock(map); if (attempts == 0) { attempts++; slab_collect(); goto retry; } printf_once("no more room for kmem_realloc in %p\n", map); return kr; } /* * Find the VM object backing the old region. */ if (!vm_map_lookup_entry(map, oldmin, &oldentry)) panic("kmem_realloc"); object = oldentry->object.vm_object; /* * Increase the size of the object and * fill in the new region. */ vm_object_reference(object); vm_object_lock(object); if (object->size != oldsize) panic("kmem_realloc"); object->size = newsize; vm_object_unlock(object); newentry->object.vm_object = object; newentry->offset = 0; /* * Since we have not given out this address yet, * it is safe to unlock the map. We are trusting * that nobody will play with either region. */ vm_map_unlock(map); /* * Remap the pages in the old region and * allocate more pages for the new region. */ kmem_remap_pages(object, 0, newaddr, newaddr + oldsize, VM_PROT_DEFAULT); kmem_alloc_pages(object, oldsize, newaddr + oldsize, newaddr + newsize, VM_PROT_DEFAULT); *newaddrp = newaddr; return KERN_SUCCESS; }
kern_return_t kmem_alloc_contig( vm_map_t map, vm_offset_t *addrp, vm_size_t size, vm_offset_t mask, ppnum_t max_pnum, ppnum_t pnum_mask, int flags) { vm_object_t object; vm_object_offset_t offset; vm_map_offset_t map_addr; vm_map_offset_t map_mask; vm_map_size_t map_size, i; vm_map_entry_t entry; vm_page_t m, pages; kern_return_t kr; if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; if (size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } map_size = vm_map_round_page(size); map_mask = (vm_map_offset_t)mask; /* * Allocate a new object (if necessary) and the reference we * will be donating to the map entry. We must do this before * locking the map, or risk deadlock with the default pager. */ if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; vm_object_reference(object); } else { object = vm_object_allocate(map_size); } kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); return kr; } entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? map_addr : 0; /* Take an extra object ref in case the map entry gets deleted */ vm_object_reference(object); vm_map_unlock(map); kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), 0); vm_object_deallocate(object); *addrp = 0; return kr; } vm_object_lock(object); for (i = 0; i < map_size; i += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; m->busy = FALSE; vm_page_insert(m, object, offset + i); } vm_object_unlock(object); if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE)) != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), 0); vm_object_deallocate(object); return kr; } vm_object_deallocate(object); if (object == kernel_object) vm_map_simplify(map, map_addr); *addrp = (vm_offset_t) map_addr; assert((vm_map_offset_t) *addrp == map_addr); return KERN_SUCCESS; }
kern_return_t kernel_memory_allocate( register vm_map_t map, register vm_offset_t *addrp, register vm_size_t size, register vm_offset_t mask, int flags, vm_tag_t tag) { vm_object_t object; vm_object_offset_t offset; vm_object_offset_t pg_offset; vm_map_entry_t entry = NULL; vm_map_offset_t map_addr, fill_start; vm_map_offset_t map_mask; vm_map_size_t map_size, fill_size; kern_return_t kr, pe_result; vm_page_t mem; vm_page_t guard_page_list = NULL; vm_page_t wired_page_list = NULL; int guard_page_count = 0; int wired_page_count = 0; int i; int vm_alloc_flags; vm_prot_t kma_prot; if (! vm_kernel_ready) { panic("kernel_memory_allocate: VM is not ready"); } map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map)); map_mask = (vm_map_offset_t) mask; vm_alloc_flags = VM_MAKE_TAG(tag); /* Check for zero allocation size (either directly or via overflow) */ if (map_size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } /* * limit the size of a single extent of wired memory * to try and limit the damage to the system if * too many pages get wired down * limit raised to 2GB with 128GB max physical limit */ if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) { return KERN_RESOURCE_SHORTAGE; } /* * Guard pages: * * Guard pages are implemented as ficticious pages. By placing guard pages * on either end of a stack, they can help detect cases where a thread walks * off either end of its stack. They are allocated and set up here and attempts * to access those pages are trapped in vm_fault_page(). * * The map_size we were passed may include extra space for * guard pages. If those were requested, then back it out of fill_size * since vm_map_find_space() takes just the actual size not including * guard pages. Similarly, fill_start indicates where the actual pages * will begin in the range. */ fill_start = 0; fill_size = map_size; if (flags & KMA_GUARD_FIRST) { vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; fill_start += PAGE_SIZE_64; fill_size -= PAGE_SIZE_64; if (map_size < fill_start + fill_size) { /* no space for a guard page */ *addrp = 0; return KERN_INVALID_ARGUMENT; } guard_page_count++; } if (flags & KMA_GUARD_LAST) { vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; fill_size -= PAGE_SIZE_64; if (map_size <= fill_start + fill_size) { /* no space for a guard page */ *addrp = 0; return KERN_INVALID_ARGUMENT; } guard_page_count++; } wired_page_count = (int) (fill_size / PAGE_SIZE_64); assert(wired_page_count * PAGE_SIZE_64 == fill_size); for (i = 0; i < guard_page_count; i++) { for (;;) { mem = vm_page_grab_guard(); if (mem != VM_PAGE_NULL) break; if (flags & KMA_NOPAGEWAIT) { kr = KERN_RESOURCE_SHORTAGE; goto out; } vm_page_more_fictitious(); } mem->pageq.next = (queue_entry_t)guard_page_list; guard_page_list = mem; } if (! (flags & KMA_VAONLY)) { for (i = 0; i < wired_page_count; i++) { uint64_t unavailable; for (;;) { if (flags & KMA_LOMEM) mem = vm_page_grablo(); else mem = vm_page_grab(); if (mem != VM_PAGE_NULL) break; if (flags & KMA_NOPAGEWAIT) { kr = KERN_RESOURCE_SHORTAGE; goto out; } if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { kr = KERN_RESOURCE_SHORTAGE; goto out; } unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; if (unavailable > max_mem || map_size > (max_mem - unavailable)) { kr = KERN_RESOURCE_SHORTAGE; goto out; } VM_PAGE_WAIT(); } mem->pageq.next = (queue_entry_t)wired_page_list; wired_page_list = mem; } } /* * Allocate a new object (if necessary). We must do this before * locking the map, or risk deadlock with the default pager. */ if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; vm_object_reference(object); } else if ((flags & KMA_COMPRESSOR) != 0) { object = compressor_object; vm_object_reference(object); } else { object = vm_object_allocate(map_size); } kr = vm_map_find_space(map, &map_addr, fill_size, map_mask, vm_alloc_flags, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); goto out; } if (object == kernel_object || object == compressor_object) { offset = map_addr; } else { offset = 0; } VME_OBJECT_SET(entry, object); VME_OFFSET_SET(entry, offset); if (object != compressor_object) entry->wired_count++; if (flags & KMA_PERMANENT) entry->permanent = TRUE; if (object != kernel_object && object != compressor_object) vm_object_reference(object); vm_object_lock(object); vm_map_unlock(map); pg_offset = 0; if (fill_start) { if (guard_page_list == NULL) panic("kernel_memory_allocate: guard_page_list == NULL"); mem = guard_page_list; guard_page_list = (vm_page_t)mem->pageq.next; mem->pageq.next = NULL; vm_page_insert(mem, object, offset + pg_offset); mem->busy = FALSE; pg_offset += PAGE_SIZE_64; } kma_prot = VM_PROT_READ | VM_PROT_WRITE; if (flags & KMA_VAONLY) { pg_offset = fill_start + fill_size; } else { for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { if (wired_page_list == NULL) panic("kernel_memory_allocate: wired_page_list == NULL"); mem = wired_page_list; wired_page_list = (vm_page_t)mem->pageq.next; mem->pageq.next = NULL; mem->wire_count++; vm_page_insert_wired(mem, object, offset + pg_offset, tag); mem->busy = FALSE; mem->pmapped = TRUE; mem->wpmapped = TRUE; PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem, kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, PMAP_OPTIONS_NOWAIT, pe_result); if (pe_result == KERN_RESOURCE_SHORTAGE) { vm_object_unlock(object); PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); vm_object_lock(object); } if (flags & KMA_NOENCRYPT) { bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); pmap_set_noencrypt(mem->phys_page); } } }