int fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) { vm_map_t map; vm_map_offset_t address = (vm_map_offset_t )arg; vm_map_entry_t tmp_entry; vm_map_entry_t entry; vm_map_offset_t start; vm_region_extended_info_data_t extended; vm_region_top_info_data_t top; task_lock(task); map = task->map; if (map == VM_MAP_NULL) { task_unlock(task); return(0); } vm_map_reference(map); task_unlock(task); vm_map_lock_read(map); start = address; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { vm_map_unlock_read(map); vm_map_deallocate(map); return(0); } } else { entry = tmp_entry; } start = entry->vme_start; pinfo->pri_offset = entry->offset; pinfo->pri_protection = entry->protection; pinfo->pri_max_protection = entry->max_protection; pinfo->pri_inheritance = entry->inheritance; pinfo->pri_behavior = entry->behavior; pinfo->pri_user_wired_count = entry->user_wired_count; pinfo->pri_user_tag = entry->alias; if (entry->is_sub_map) { pinfo->pri_flags |= PROC_REGION_SUBMAP; } else { if (entry->is_shared) pinfo->pri_flags |= PROC_REGION_SHARED; } extended.protection = entry->protection; extended.user_tag = entry->alias; extended.pages_resident = 0; extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; extended.external_pager = 0; extended.shadow_depth = 0; vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) extended.share_mode = SM_PRIVATE; top.private_pages_resident = 0; top.shared_pages_resident = 0; vm_map_region_top_walk(entry, &top); pinfo->pri_pages_resident = extended.pages_resident; pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; pinfo->pri_pages_swapped_out = extended.pages_swapped_out; pinfo->pri_pages_dirtied = extended.pages_dirtied; pinfo->pri_ref_count = extended.ref_count; pinfo->pri_shadow_depth = extended.shadow_depth; pinfo->pri_share_mode = extended.share_mode; pinfo->pri_private_pages_resident = top.private_pages_resident; pinfo->pri_shared_pages_resident = top.shared_pages_resident; pinfo->pri_obj_id = top.obj_id; pinfo->pri_address = (uint64_t)start; pinfo->pri_size = (uint64_t)(entry->vme_end - start); pinfo->pri_depth = 0; if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { *vnodeaddr = (uintptr_t)0; if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { vm_map_unlock_read(map); vm_map_deallocate(map); return(1); } } vm_map_unlock_read(map); vm_map_deallocate(map); return(1); }
/* * msync system call handler * * msync_args(void *addr, size_t len, int flags) * * No requirements */ int sys_msync(struct msync_args *uap) { struct proc *p = curproc; vm_offset_t addr; vm_offset_t tmpaddr; vm_size_t size, pageoff; int flags; vm_map_t map; int rv; addr = (vm_offset_t) uap->addr; size = uap->len; flags = uap->flags; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (size < uap->len) /* wrap */ return(EINVAL); tmpaddr = addr + size; /* workaround gcc4 opt */ if (tmpaddr < addr) /* wrap */ return(EINVAL); if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) return (EINVAL); map = &p->p_vmspace->vm_map; /* * map->token serializes extracting the address range for size == 0 * msyncs with the vm_map_clean call; if the token were not held * across the two calls, an intervening munmap/mmap pair, for example, * could cause msync to occur on a wrong region. */ lwkt_gettoken(&map->token); /* * XXX Gak! If size is zero we are supposed to sync "all modified * pages with the region containing addr". Unfortunately, we don't * really keep track of individual mmaps so we approximate by flushing * the range of the map entry containing addr. This can be incorrect * if the region splits or is coalesced with a neighbor. */ if (size == 0) { vm_map_entry_t entry; vm_map_lock_read(map); rv = vm_map_lookup_entry(map, addr, &entry); if (rv == FALSE) { vm_map_unlock_read(map); rv = KERN_INVALID_ADDRESS; goto done; } addr = entry->start; size = entry->end - entry->start; vm_map_unlock_read(map); } /* * Clean the pages and interpret the return value. */ rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); done: lwkt_reltoken(&map->token); switch (rv) { case KERN_SUCCESS: break; case KERN_INVALID_ADDRESS: return (EINVAL); /* Sun returns ENOMEM? */ case KERN_FAILURE: return (EIO); default: return (EINVAL); } return (0); }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&vm_token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? */ crit_enter(); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } crit_exit(); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&vm_token); return (error); }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_doprocmap(PFS_FILL_ARGS) { struct vmspace *vm; vm_map_t map; vm_map_entry_t entry, tmp_entry; struct vnode *vp; char *fullpath, *freepath; struct uidinfo *uip; int error, vfslocked; unsigned int last_timestamp; #ifdef COMPAT_FREEBSD32 int wrap32 = 0; #endif PROC_LOCK(p); error = p_candebug(td, p); PROC_UNLOCK(p); if (error) return (error); if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (curproc->p_sysent->sv_flags & SV_ILP32) { if (!(p->p_sysent->sv_flags & SV_ILP32)) return (EOPNOTSUPP); wrap32 = 1; } #endif vm = vmspace_acquire_ref(p); if (vm == NULL) return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t e_start, e_end, addr; int resident, privateresident; char *type; vm_eflags_t e_eflags; vm_prot_t e_prot; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; e_eflags = entry->eflags; e_prot = entry->protection; e_start = entry->start; e_end = entry->end; privateresident = 0; obj = entry->object.vm_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); if (obj->shadow_count == 1) privateresident = obj->resident_page_count; } uip = (entry->uip) ? entry->uip : (obj ? obj->uip : NULL); resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(map->pmap, addr)) resident++; addr += PAGE_SIZE; } for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_LOCK(tobj); if (lobj != obj) VM_OBJECT_UNLOCK(lobj); lobj = tobj; } last_timestamp = map->timestamp; vm_map_unlock_read(map); freepath = NULL; fullpath = "-"; if (lobj) { switch (lobj->type) { default: case OBJT_DEFAULT: type = "default"; vp = NULL; break; case OBJT_VNODE: type = "vnode"; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: type = "swap"; vp = NULL; break; case OBJT_SG: case OBJT_DEVICE: type = "device"; vp = NULL; break; } if (lobj != obj) VM_OBJECT_UNLOCK(lobj); flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; VM_OBJECT_UNLOCK(obj); if (vp != NULL) { vn_fullpath(td, vp, &fullpath, &freepath); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); } } else { type = "none"; flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, resident, private resident, cow, access, type, * charged, charged uid. */ error = sbuf_printf(sb, "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n", (u_long)e_start, (u_long)e_end, resident, privateresident, #ifdef COMPAT_FREEBSD32 wrap32 ? NULL : obj, /* Hide 64 bit value */ #else obj, #endif (e_prot & VM_PROT_READ)?"r":"-", (e_prot & VM_PROT_WRITE)?"w":"-", (e_prot & VM_PROT_EXECUTE)?"x":"-", ref_count, shadow_count, flags, (e_eflags & MAP_ENTRY_COW)?"COW":"NCOW", (e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", type, fullpath, uip ? "CH":"NCH", uip ? uip->ui_uid : -1); if (freepath != NULL) free(freepath, M_TEMP); vm_map_lock_read(map); if (error == -1) { error = 0; break; } if (last_timestamp != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. Specifically, * the entry may have been clipped, merged, or deleted. */ vm_map_lookup_entry(map, e_end - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); return (error); }
void update_holes_on_entry_creation(vm_map_t map, vm_map_entry_t new_entry) { vm_map_entry_t hole_entry, next_hole_entry; #if DEBUG struct vm_map_entry old_hole_entry; vm_map_entry_t tmp_entry; boolean_t check_map_with_hole_sanity = TRUE; #endif /* DEBUG */ /* * Case A: The entry is aligned exactly with the start and end of the hole. * This will delete the hole. * * Case B: The entry is completely within a hole but NOT aligned with the start/end of the hole. * This will split a hole. * * Case C: The entry overlaps with the hole. The entry could be extending upwards (C1) or downwards (C2). * This will reduce the size of the hole or delete the hole completely if it is smaller than the entry. */ hole_entry = (vm_map_entry_t) map->holes_list; assert(hole_entry); next_hole_entry = hole_entry->vme_next; while (1) { #if DEBUG /* * If the entry doesn't exist in the RB tree, we are likely dealing with copy maps where * the entries belonging to the copy map are linked into the list of entries silently and * then added to the RB-tree later on. * So sanity checks are useless in that case. */ check_map_with_hole_sanity = vm_map_lookup_entry(map, new_entry->vme_start, &tmp_entry); #endif /* DEBUG */ if (hole_entry->vme_start == new_entry->vme_start && hole_entry->vme_end == new_entry->vme_end) { /* Case A */ #if DEBUG copy_hole_info(hole_entry, &old_hole_entry); #endif /* DEBUG */ /* * This check makes sense only for regular maps, not copy maps. * With a regular map, the VM entry is first linked and then * the hole is deleted. So the check below, which makes sure that * the map's bounds are being respected, is valid. * But for copy maps, the hole is deleted before the VM entry is * linked (vm_map_store_copy_insert) and so this check is invalid. * if (hole_entry == (vm_map_entry_t) map->holes_list) { if (hole_entry->vme_next == (vm_map_entry_t) map->holes_list) { next_hole_entry = vm_map_last_entry(map); assert(next_hole_entry->vme_end >= map->max_offset); } } */ vm_map_delete_hole(map, hole_entry); #if DEBUG if (check_map_with_hole_sanity) check_map_sanity(map, &old_hole_entry); #endif /* DEBUG */ return; } else if (hole_entry->vme_start < new_entry->vme_start && hole_entry->vme_end > new_entry->vme_end) { /* Case B */ struct vm_map_links *new_hole_entry = NULL; new_hole_entry = zalloc(vm_map_holes_zone); #if DEBUG copy_hole_info(hole_entry, &old_hole_entry); #endif /* DEBUG */ new_hole_entry->prev = hole_entry; new_hole_entry->next = hole_entry->vme_next; hole_entry->vme_next->vme_prev = (vm_map_entry_t)new_hole_entry; hole_entry->vme_next = (vm_map_entry_t)new_hole_entry; new_hole_entry->start = new_entry->vme_end; new_hole_entry->end = hole_entry->vme_end; hole_entry->vme_end = new_entry->vme_start; assert(hole_entry->vme_start < hole_entry->vme_end); assert(new_hole_entry->start < new_hole_entry->end); #if DEBUG if (check_map_with_hole_sanity) check_map_sanity(map, &old_hole_entry); #endif /* DEBUG */ SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry); return; } else if ((new_entry->vme_start <= hole_entry->vme_start) && (hole_entry->vme_start < new_entry->vme_end)) { /* * Case C1: Entry moving upwards and a part/full hole lies within the bounds of the entry. */ #if DEBUG copy_hole_info(hole_entry, &old_hole_entry); #endif /* DEBUG */ if (hole_entry->vme_end <= new_entry->vme_end) { vm_map_delete_hole(map, hole_entry); } else { hole_entry->vme_start = new_entry->vme_end; SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry); } #if DEBUG if (check_map_with_hole_sanity) check_map_sanity(map, &old_hole_entry); #endif /* DEBUG */ return; } else if ((new_entry->vme_start < hole_entry->vme_end) && (hole_entry->vme_end <= new_entry->vme_end)) { /* * Case C2: Entry moving downwards and a part/full hole lies within the bounds of the entry. */ #if DEBUG copy_hole_info(hole_entry, &old_hole_entry); #endif /* DEBUG */ if (hole_entry->vme_start >= new_entry->vme_start) { vm_map_delete_hole(map, hole_entry); } else { hole_entry->vme_end = new_entry->vme_start; SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry); } #if DEBUG if (check_map_with_hole_sanity) check_map_sanity(map, &old_hole_entry); #endif /* DEBUG */ return; } hole_entry = next_hole_entry; next_hole_entry = hole_entry->vme_next; if (hole_entry == (vm_map_entry_t)map->holes_list) break; } panic("Illegal action: h1: %p, s:0x%llx, e:0x%llx...h2:%p, s:0x%llx, e:0x%llx...h3:0x%p, s:0x%llx, e:0x%llx\n", hole_entry->vme_prev, (unsigned long long)hole_entry->vme_prev->vme_start, (unsigned long long)hole_entry->vme_prev->vme_end, hole_entry, (unsigned long long)hole_entry->vme_start, (unsigned long long)hole_entry->vme_end, hole_entry->vme_next, (unsigned long long)hole_entry->vme_next->vme_start, (unsigned long long)hole_entry->vme_next->vme_end); }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_domap(struct proc *curp, struct lwp *lp, struct pfsnode *pfs, struct uio *uio) { struct proc *p = lp->lwp_proc; int len; struct vnode *vp; char *fullpath, *freepath; int error; vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = vmspace_pmap(p->p_vmspace); vm_map_entry_t entry; char mebuffer[MEBUFFERSIZE]; if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); if (uio->uio_offset != 0) return (0); error = 0; vm_map_lock_read(map); for (entry = map->header.next; ((uio->uio_resid > 0) && (entry != &map->header)); entry = entry->next) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t addr; vm_offset_t ostart; int resident, privateresident; char *type; if (entry->maptype != VM_MAPTYPE_NORMAL && entry->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } obj = entry->object.vm_object; if (obj) vm_object_hold(obj); if (obj && (obj->shadow_count == 1)) privateresident = obj->resident_page_count; else privateresident = 0; /* * Use map->hint as a poor man's ripout detector. */ map->hint = entry; ostart = entry->start; /* * Count resident pages (XXX can be horrible on 64-bit) */ resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(pmap, addr)) resident++; addr += PAGE_SIZE; } if (obj) { lobj = obj; while ((tobj = lobj->backing_object) != NULL) { KKASSERT(tobj != obj); vm_object_hold(tobj); if (tobj == lobj->backing_object) { if (lobj != obj) { vm_object_lock_swap(); vm_object_drop(lobj); } lobj = tobj; } else { vm_object_drop(tobj); } } } else { lobj = NULL; } freepath = NULL; fullpath = "-"; if (lobj) { switch(lobj->type) { default: case OBJT_DEFAULT: type = "default"; vp = NULL; break; case OBJT_VNODE: type = "vnode"; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: type = "swap"; vp = NULL; break; case OBJT_DEVICE: type = "device"; vp = NULL; break; } flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; if (vp != NULL) { vn_fullpath(p, vp, &fullpath, &freepath, 1); vrele(vp); } if (lobj != obj) vm_object_drop(lobj); } else { type = "none"; flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, res, priv res, cow, access, type, (fullpath). */ ksnprintf(mebuffer, sizeof(mebuffer), #if LONG_BIT == 64 "0x%016lx 0x%016lx %d %d %p %s%s%s %d %d " #else "0x%08lx 0x%08lx %d %d %p %s%s%s %d %d " #endif "0x%04x %s %s %s %s\n", (u_long)entry->start, (u_long)entry->end, resident, privateresident, obj, (entry->protection & VM_PROT_READ)?"r":"-", (entry->protection & VM_PROT_WRITE)?"w":"-", (entry->protection & VM_PROT_EXECUTE)?"x":"-", ref_count, shadow_count, flags, (entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW", (entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", type, fullpath); if (obj) vm_object_drop(obj); if (freepath != NULL) { kfree(freepath, M_TEMP); freepath = NULL; } len = strlen(mebuffer); if (len > uio->uio_resid) { error = EFBIG; break; } /* * We cannot safely hold the map locked while accessing * userspace as a VM fault might recurse the locked map. */ vm_map_unlock_read(map); error = uiomove(mebuffer, len, uio); vm_map_lock_read(map); if (error) break; /* * We use map->hint as a poor man's ripout detector. If * it does not match the entry we set it to prior to * unlocking the map the entry MIGHT now be stale. In * this case we do an expensive lookup to find our place * in the iteration again. */ if (map->hint != entry) { vm_map_entry_t reentry; vm_map_lookup_entry(map, ostart, &reentry); entry = reentry; } } vm_map_unlock_read(map); return error; }
/* * kmem_realloc: * * Reallocate wired-down memory in the kernel's address map * or a submap. Newly allocated pages are not zeroed. * This can only be used on regions allocated with kmem_alloc. * * If successful, the pages in the old region are mapped twice. * The old region is unchanged. Use kmem_free to get rid of it. */ kern_return_t kmem_realloc( vm_map_t map, vm_offset_t oldaddr, vm_size_t oldsize, vm_offset_t *newaddrp, vm_size_t newsize) { vm_offset_t oldmin, oldmax; vm_offset_t newaddr; vm_object_t object; vm_map_entry_t oldentry, newentry; unsigned int attempts; kern_return_t kr; oldmin = trunc_page(oldaddr); oldmax = round_page(oldaddr + oldsize); oldsize = oldmax - oldmin; newsize = round_page(newsize); /* * Find space for the new region. */ attempts = 0; retry: vm_map_lock(map); kr = vm_map_find_entry(map, &newaddr, newsize, (vm_offset_t) 0, VM_OBJECT_NULL, &newentry); if (kr != KERN_SUCCESS) { vm_map_unlock(map); if (attempts == 0) { attempts++; slab_collect(); goto retry; } printf_once("no more room for kmem_realloc in %p\n", map); return kr; } /* * Find the VM object backing the old region. */ if (!vm_map_lookup_entry(map, oldmin, &oldentry)) panic("kmem_realloc"); object = oldentry->object.vm_object; /* * Increase the size of the object and * fill in the new region. */ vm_object_reference(object); vm_object_lock(object); if (object->size != oldsize) panic("kmem_realloc"); object->size = newsize; vm_object_unlock(object); newentry->object.vm_object = object; newentry->offset = 0; /* * Since we have not given out this address yet, * it is safe to unlock the map. We are trusting * that nobody will play with either region. */ vm_map_unlock(map); /* * Remap the pages in the old region and * allocate more pages for the new region. */ kmem_remap_pages(object, 0, newaddr, newaddr + oldsize, VM_PROT_DEFAULT); kmem_alloc_pages(object, oldsize, newaddr + oldsize, newaddr + newsize, VM_PROT_DEFAULT); *newaddrp = newaddr; return KERN_SUCCESS; }
kern_return_t projected_buffer_map( vm_map_t map, vm_offset_t kernel_addr, vm_size_t size, vm_offset_t *user_p, vm_prot_t protection, vm_inherit_t inheritance) /*Currently only VM_INHERIT_NONE supported*/ { vm_map_entry_t u_entry, k_entry; vm_offset_t physical_addr, user_addr; vm_size_t r_size; kern_return_t kr; /* * Find entry in kernel map */ size = round_page(size); if (map == VM_MAP_NULL || map == kernel_map || !vm_map_lookup_entry(kernel_map, kernel_addr, &k_entry) || kernel_addr + size > k_entry->vme_end) return(KERN_INVALID_ARGUMENT); /* * Create entry in user task */ vm_map_lock(map); kr = vm_map_find_entry(map, &user_addr, size, (vm_offset_t) 0, VM_OBJECT_NULL, &u_entry); if (kr != KERN_SUCCESS) { vm_map_unlock(map); return kr; } u_entry->object.vm_object = k_entry->object.vm_object; vm_object_reference(k_entry->object.vm_object); u_entry->offset = kernel_addr - k_entry->vme_start + k_entry->offset; u_entry->projected_on = k_entry; /*Creates coupling with kernel mapping of the buffer, and also guarantees that user cannot directly manipulate buffer VM entry*/ u_entry->protection = protection; u_entry->max_protection = protection; u_entry->inheritance = inheritance; u_entry->wired_count = k_entry->wired_count; vm_map_unlock(map); *user_p = user_addr; /* Set up physical mappings for user pmap */ pmap_pageable(map->pmap, user_addr, user_addr + size, !k_entry->wired_count); for (r_size = 0; r_size < size; r_size += PAGE_SIZE) { physical_addr = pmap_extract(kernel_pmap, kernel_addr + r_size); pmap_enter(map->pmap, user_addr + r_size, physical_addr, protection, k_entry->wired_count); } return(KERN_SUCCESS); }
static void* commpage_allocate( vm_map_t submap, // commpage32_map or commpage_map64 size_t area_used, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED vm_prot_t uperm) { vm_offset_t kernel_addr = 0; // address of commpage in kernel map vm_offset_t zero = 0; vm_size_t size = area_used; // size actually populated vm_map_entry_t entry; ipc_port_t handle; kern_return_t kr; if (submap == NULL) panic("commpage submap is null"); if ((kr = vm_map(kernel_map,&kernel_addr,area_used,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE))) panic("cannot allocate commpage %d", kr); if ((kr = vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE))) panic("cannot wire commpage: %d", kr); /* * Now that the object is created and wired into the kernel map, mark it so that no delay * copy-on-write will ever be performed on it as a result of mapping it into user-space. * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and * that would be a real disaster. * * JMM - What we really need is a way to create it like this in the first place. */ if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr, VM_MAP_PAGE_MASK(kernel_map)), &entry) || entry->is_sub_map)) panic("cannot find commpage entry %d", kr); entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; if ((kr = mach_make_memory_entry( kernel_map, // target map &size, // size kernel_addr, // offset (address in kernel map) uperm, // protections as specified &handle, // this is the object handle we get NULL ))) // parent_entry (what is this?) panic("cannot make entry for commpage %d", kr); if ((kr = vm_map_64( submap, // target map (shared submap) &zero, // address (map into 1st page in submap) area_used, // size 0, // mask VM_FLAGS_FIXED, // flags (it must be 1st page in submap) handle, // port is the memory entry we just made 0, // offset (map 1st page in memory entry) FALSE, // copy uperm, // cur_protection (R-only in user map) uperm, // max_protection VM_INHERIT_SHARE ))) // inheritance panic("cannot map commpage %d", kr); ipc_port_release(handle); /* Make the kernel mapping non-executable. This cannot be done * at the time of map entry creation as mach_make_memory_entry * cannot handle disjoint permissions at this time. */ kr = vm_protect(kernel_map, kernel_addr, area_used, FALSE, VM_PROT_READ | VM_PROT_WRITE); assert (kr == KERN_SUCCESS); return (void*)(intptr_t)kernel_addr; // return address in kernel map }