/* * No requirements. */ int useracc(c_caddr_t addr, int len, int rw) { boolean_t rv; vm_prot_t prot; vm_map_t map; vm_map_entry_t save_hint; vm_offset_t wrap; KASSERT((rw & (~VM_PROT_ALL)) == 0, ("illegal ``rw'' argument to useracc (%x)", rw)); prot = rw; /* * XXX - check separately to disallow access to user area and user * page tables - they are in the map. */ wrap = (vm_offset_t)addr + len; if (wrap > VM_MAX_USER_ADDRESS || wrap < (vm_offset_t)addr) { return (FALSE); } map = &curproc->p_vmspace->vm_map; vm_map_lock_read(map); /* * We save the map hint, and restore it. Useracc appears to distort * the map hint unnecessarily. */ save_hint = map->hint; rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), round_page(wrap), prot, TRUE); map->hint = save_hint; vm_map_unlock_read(map); return (rv == TRUE); }
static void get_proc_size_info(struct lwp *l, unsigned long *stext, unsigned long *etext, unsigned long *sstack) { struct proc *p = l->l_proc; struct vmspace *vm; struct vm_map *map; struct vm_map_entry *entry; *stext = 0; *etext = 0; *sstack = 0; proc_vmspace_getref(p, &vm); map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (UVM_ET_ISSUBMAP(entry)) continue; /* assume text is the first entry */ if (*stext == *etext) { *stext = entry->start; *etext = entry->end; break; } } #if defined(LINUX_USRSTACK32) && defined(USRSTACK32) if (strcmp(p->p_emul->e_name, "linux32") == 0 && LINUX_USRSTACK32 < USRSTACK32) *sstack = (unsigned long)LINUX_USRSTACK32; else #endif #ifdef LINUX_USRSTACK if (strcmp(p->p_emul->e_name, "linux") == 0 && LINUX_USRSTACK < USRSTACK) *sstack = (unsigned long)LINUX_USRSTACK; else #endif #ifdef USRSTACK32 if (strstr(p->p_emul->e_name, "32") != NULL) *sstack = (unsigned long)USRSTACK32; else #endif *sstack = (unsigned long)USRSTACK; /* * jdk 1.6 compares low <= addr && addr < high * if we put addr == high, then the test fails * so eat one page. */ *sstack -= PAGE_SIZE; vm_map_unlock_read(map); uvmspace_free(vm); }
static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { u_long size; vm_map_lock_read(kmem_map); size = kmem_map->root != NULL ? kmem_map->root->max_free : kmem_map->max_offset - kmem_map->min_offset; vm_map_unlock_read(kmem_map); return (sysctl_handle_long(oidp, &size, 0, req)); }
/* * No requirements. */ int useracc(c_caddr_t addr, int len, int rw) { boolean_t rv; vm_prot_t prot; vm_map_t map; vm_offset_t wrap; vm_offset_t gpa; KASSERT((rw & (~VM_PROT_ALL)) == 0, ("illegal ``rw'' argument to useracc (%x)", rw)); prot = rw; if (curthread->td_vmm) { if (vmm_vm_get_gpa(curproc, (register_t *)&gpa, (register_t) addr)) panic("%s: could not get GPA\n", __func__); addr = (c_caddr_t) gpa; } /* * XXX - check separately to disallow access to user area and user * page tables - they are in the map. */ wrap = (vm_offset_t)addr + len; if (wrap > VM_MAX_USER_ADDRESS || wrap < (vm_offset_t)addr) { return (FALSE); } map = &curproc->p_vmspace->vm_map; vm_map_lock_read(map); rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), round_page(wrap), prot, TRUE); vm_map_unlock_read(map); return (rv == TRUE); }
int fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid) { vm_map_t map; vm_map_offset_t address = (vm_map_offset_t )arg; vm_map_entry_t tmp_entry; vm_map_entry_t entry; vm_map_offset_t start; vm_region_extended_info_data_t extended; vm_region_top_info_data_t top; task_lock(task); map = task->map; if (map == VM_MAP_NULL) { task_unlock(task); return(0); } vm_map_reference(map); task_unlock(task); vm_map_lock_read(map); start = address; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { vm_map_unlock_read(map); vm_map_deallocate(map); return(0); } } else { entry = tmp_entry; } start = entry->vme_start; pinfo->pri_offset = entry->offset; pinfo->pri_protection = entry->protection; pinfo->pri_max_protection = entry->max_protection; pinfo->pri_inheritance = entry->inheritance; pinfo->pri_behavior = entry->behavior; pinfo->pri_user_wired_count = entry->user_wired_count; pinfo->pri_user_tag = entry->alias; if (entry->is_sub_map) { pinfo->pri_flags |= PROC_REGION_SUBMAP; } else { if (entry->is_shared) pinfo->pri_flags |= PROC_REGION_SHARED; } extended.protection = entry->protection; extended.user_tag = entry->alias; extended.pages_resident = 0; extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; extended.external_pager = 0; extended.shadow_depth = 0; vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) extended.share_mode = SM_PRIVATE; top.private_pages_resident = 0; top.shared_pages_resident = 0; vm_map_region_top_walk(entry, &top); pinfo->pri_pages_resident = extended.pages_resident; pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; pinfo->pri_pages_swapped_out = extended.pages_swapped_out; pinfo->pri_pages_dirtied = extended.pages_dirtied; pinfo->pri_ref_count = extended.ref_count; pinfo->pri_shadow_depth = extended.shadow_depth; pinfo->pri_share_mode = extended.share_mode; pinfo->pri_private_pages_resident = top.private_pages_resident; pinfo->pri_shared_pages_resident = top.shared_pages_resident; pinfo->pri_obj_id = top.obj_id; pinfo->pri_address = (uint64_t)start; pinfo->pri_size = (uint64_t)(entry->vme_end - start); pinfo->pri_depth = 0; if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { *vnodeaddr = (uint32_t)0; if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { vm_map_unlock_read(map); vm_map_deallocate(map); return(1); } } vm_map_unlock_read(map); vm_map_deallocate(map); return(1); }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&map->token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? * * XXX vm_token - legacy for pmap_ts_referenced * in i386 and vkernel pmap code. */ lwkt_gettoken(&vm_token); vm_object_hold(current->object.vm_object); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } vm_object_drop(current->object.vm_object); lwkt_reltoken(&vm_token); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&map->token); return (error); }
/* * msync system call handler * * msync_args(void *addr, size_t len, int flags) * * No requirements */ int sys_msync(struct msync_args *uap) { struct proc *p = curproc; vm_offset_t addr; vm_offset_t tmpaddr; vm_size_t size, pageoff; int flags; vm_map_t map; int rv; addr = (vm_offset_t) uap->addr; size = uap->len; flags = uap->flags; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (size < uap->len) /* wrap */ return(EINVAL); tmpaddr = addr + size; /* workaround gcc4 opt */ if (tmpaddr < addr) /* wrap */ return(EINVAL); if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) return (EINVAL); map = &p->p_vmspace->vm_map; /* * map->token serializes extracting the address range for size == 0 * msyncs with the vm_map_clean call; if the token were not held * across the two calls, an intervening munmap/mmap pair, for example, * could cause msync to occur on a wrong region. */ lwkt_gettoken(&map->token); /* * XXX Gak! If size is zero we are supposed to sync "all modified * pages with the region containing addr". Unfortunately, we don't * really keep track of individual mmaps so we approximate by flushing * the range of the map entry containing addr. This can be incorrect * if the region splits or is coalesced with a neighbor. */ if (size == 0) { vm_map_entry_t entry; vm_map_lock_read(map); rv = vm_map_lookup_entry(map, addr, &entry); if (rv == FALSE) { vm_map_unlock_read(map); rv = KERN_INVALID_ADDRESS; goto done; } addr = entry->start; size = entry->end - entry->start; vm_map_unlock_read(map); } /* * Clean the pages and interpret the return value. */ rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); done: lwkt_reltoken(&map->token); switch (rv) { case KERN_SUCCESS: break; case KERN_INVALID_ADDRESS: return (EINVAL); /* Sun returns ENOMEM? */ case KERN_FAILURE: return (EIO); default: return (EINVAL); } return (0); }
/* ARGSUSED */ int sys_mincore(struct proc *p, void *v, register_t *retval) { struct sys_mincore_args /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ *uap = v; vm_page_t m; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; vm_map_entry_t entry; vaddr_t start, end, lim; vm_map_t map; vsize_t len, npgs; int error = 0; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return (EINVAL); len = round_page(len); end = start + len; if (end <= start) return (EINVAL); npgs = len >> PAGE_SHIFT; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0) return (error); vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (entry->object.uvm_obj->pgops->pgo_releasepg == NULL) { pgi = 1; for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) copyout(&pgi, vec, sizeof(char)); continue; } } amap = entry->aref.ar_amap; /* top layer */ uobj = entry->object.uvm_obj; /* bottom layer */ if (uobj != NULL) simple_lock(&uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the top layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the bottom layer. */ m = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (m != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } copyout(&pgi, vec, sizeof(char)); } if (uobj != NULL) simple_unlock(&uobj->vmobjlock); } out: vm_map_unlock_read(map); uvm_vsunlock(p, SCARG(uap, vec), npgs); return (error); }
int sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, register_t *retval) { /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(int) flags; } */ struct proc *p = l->l_proc; vaddr_t addr; vsize_t size, pageoff; struct vm_map *map; int error, flags, uvmflags; bool rv; /* * extract syscall args from the uap */ addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); flags = SCARG(uap, flags); /* sanity check flags */ if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) return EINVAL; if ((flags & (MS_ASYNC | MS_SYNC)) == 0) flags |= MS_SYNC; /* * align the address to a page boundary and adjust the size accordingly. */ pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vsize_t)round_page(size); /* * get map */ map = &p->p_vmspace->vm_map; error = range_test(map, addr, size, false); if (error) return ENOMEM; /* * XXXCDC: do we really need this semantic? * * XXX Gak! If size is zero we are supposed to sync "all modified * pages with the region containing addr". Unfortunately, we * don't really keep track of individual mmaps so we approximate * by flushing the range of the map entry containing addr. * This can be incorrect if the region splits or is coalesced * with a neighbor. */ if (size == 0) { struct vm_map_entry *entry; vm_map_lock_read(map); rv = uvm_map_lookup_entry(map, addr, &entry); if (rv == true) { addr = entry->start; size = entry->end - entry->start; } vm_map_unlock_read(map); if (rv == false) return EINVAL; } /* * translate MS_ flags into PGO_ flags */ uvmflags = PGO_CLEANIT; if (flags & MS_INVALIDATE) uvmflags |= PGO_FREE; if (flags & MS_SYNC) uvmflags |= PGO_SYNCIO; error = uvm_map_clean(map, addr, addr+size, uvmflags); return error; }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_domap(struct proc *curp, struct lwp *lp, struct pfsnode *pfs, struct uio *uio) { struct proc *p = lp->lwp_proc; int len; struct vnode *vp; char *fullpath, *freepath; int error; vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = vmspace_pmap(p->p_vmspace); vm_map_entry_t entry; char mebuffer[MEBUFFERSIZE]; if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); if (uio->uio_offset != 0) return (0); error = 0; vm_map_lock_read(map); for (entry = map->header.next; ((uio->uio_resid > 0) && (entry != &map->header)); entry = entry->next) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t addr; vm_offset_t ostart; int resident, privateresident; char *type; if (entry->maptype != VM_MAPTYPE_NORMAL && entry->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } obj = entry->object.vm_object; if (obj) vm_object_hold(obj); if (obj && (obj->shadow_count == 1)) privateresident = obj->resident_page_count; else privateresident = 0; /* * Use map->hint as a poor man's ripout detector. */ map->hint = entry; ostart = entry->start; /* * Count resident pages (XXX can be horrible on 64-bit) */ resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(pmap, addr)) resident++; addr += PAGE_SIZE; } if (obj) { lobj = obj; while ((tobj = lobj->backing_object) != NULL) { KKASSERT(tobj != obj); vm_object_hold(tobj); if (tobj == lobj->backing_object) { if (lobj != obj) { vm_object_lock_swap(); vm_object_drop(lobj); } lobj = tobj; } else { vm_object_drop(tobj); } } } else { lobj = NULL; } freepath = NULL; fullpath = "-"; if (lobj) { switch(lobj->type) { default: case OBJT_DEFAULT: type = "default"; vp = NULL; break; case OBJT_VNODE: type = "vnode"; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: type = "swap"; vp = NULL; break; case OBJT_DEVICE: type = "device"; vp = NULL; break; } flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; if (vp != NULL) { vn_fullpath(p, vp, &fullpath, &freepath, 1); vrele(vp); } if (lobj != obj) vm_object_drop(lobj); } else { type = "none"; flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, res, priv res, cow, access, type, (fullpath). */ ksnprintf(mebuffer, sizeof(mebuffer), #if LONG_BIT == 64 "0x%016lx 0x%016lx %d %d %p %s%s%s %d %d " #else "0x%08lx 0x%08lx %d %d %p %s%s%s %d %d " #endif "0x%04x %s %s %s %s\n", (u_long)entry->start, (u_long)entry->end, resident, privateresident, obj, (entry->protection & VM_PROT_READ)?"r":"-", (entry->protection & VM_PROT_WRITE)?"w":"-", (entry->protection & VM_PROT_EXECUTE)?"x":"-", ref_count, shadow_count, flags, (entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW", (entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", type, fullpath); if (obj) vm_object_drop(obj); if (freepath != NULL) { kfree(freepath, M_TEMP); freepath = NULL; } len = strlen(mebuffer); if (len > uio->uio_resid) { error = EFBIG; break; } /* * We cannot safely hold the map locked while accessing * userspace as a VM fault might recurse the locked map. */ vm_map_unlock_read(map); error = uiomove(mebuffer, len, uio); vm_map_lock_read(map); if (error) break; /* * We use map->hint as a poor man's ripout detector. If * it does not match the entry we set it to prior to * unlocking the map the entry MIGHT now be stale. In * this case we do an expensive lookup to find our place * in the iteration again. */ if (map->hint != entry) { vm_map_entry_t reentry; vm_map_lookup_entry(map, ostart, &reentry); entry = reentry; } } vm_map_unlock_read(map); return error; }
/* ARGSUSED */ int sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retval) { /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ struct proc *p = l->l_proc; struct vm_page *pg; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; struct vm_map_entry *entry; vaddr_t start, end, lim; struct vm_map *map; vsize_t len; int error = 0, npgs; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return EINVAL; len = round_page(len); end = start + len; if (end <= start) return EINVAL; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ npgs = len >> PAGE_SHIFT; error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); if (error) { return error; } vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == false) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) subyte(vec, 1); continue; } } amap = entry->aref.ar_amap; /* upper layer */ uobj = entry->object.uvm_obj; /* lower layer */ if (amap != NULL) amap_lock(amap); if (uobj != NULL) mutex_enter(uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the upper layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the lower layer. */ pg = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (pg != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } (void) subyte(vec, pgi); } if (uobj != NULL) mutex_exit(uobj->vmobjlock); if (amap != NULL) amap_unlock(amap); } out: vm_map_unlock_read(map); uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); return error; }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_domap(struct lwp *curl, struct proc *p, struct pfsnode *pfs, struct uio *uio, int linuxmode) { int error; struct vmspace *vm; struct vm_map *map; struct vm_map_entry *entry; char *buffer = NULL; size_t bufsize = BUFFERSIZE; char *path; struct vnode *vp; struct vattr va; dev_t dev; long fileid; size_t pos; int width = (int)((curl->l_proc->p_flag & PK_32) ? sizeof(int32_t) : sizeof(void *)) * 2; if (uio->uio_rw != UIO_READ) return EOPNOTSUPP; if (uio->uio_offset != 0) { /* * we return 0 here, so that the second read returns EOF * we don't support reading from an offset because the * map could have changed between the two reads. */ return 0; } error = 0; if (linuxmode != 0) path = malloc(MAXPATHLEN * 4, M_TEMP, M_WAITOK); else path = NULL; if ((error = proc_vmspace_getref(p, &vm)) != 0) goto out; map = &vm->vm_map; vm_map_lock_read(map); again: buffer = malloc(bufsize, M_TEMP, M_WAITOK); pos = 0; for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (UVM_ET_ISSUBMAP(entry)) continue; if (linuxmode != 0) { *path = 0; dev = (dev_t)0; fileid = 0; if (UVM_ET_ISOBJ(entry) && UVM_OBJ_IS_VNODE(entry->object.uvm_obj)) { vp = (struct vnode *)entry->object.uvm_obj; vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &va, curl->l_cred); VOP_UNLOCK(vp); if (error == 0 && vp != pfs->pfs_vnode) { fileid = va.va_fileid; dev = va.va_fsid; error = vnode_to_path(path, MAXPATHLEN * 4, vp, curl, p); } } pos += snprintf(buffer + pos, bufsize - pos, "%.*"PRIxVADDR"-%.*"PRIxVADDR" %c%c%c%c " "%.*lx %.2llx:%.2llx %-8ld %25.s %s\n", width, entry->start, width, entry->end, (entry->protection & VM_PROT_READ) ? 'r' : '-', (entry->protection & VM_PROT_WRITE) ? 'w' : '-', (entry->protection & VM_PROT_EXECUTE) ? 'x' : '-', (entry->etype & UVM_ET_COPYONWRITE) ? 'p' : 's', width, (unsigned long)entry->offset, (unsigned long long)major(dev), (unsigned long long)minor(dev), fileid, "", path); } else { pos += snprintf(buffer + pos, bufsize - pos, "%#"PRIxVADDR" %#"PRIxVADDR" " "%c%c%c %c%c%c %s %s %d %d %d\n", entry->start, entry->end, (entry->protection & VM_PROT_READ) ? 'r' : '-', (entry->protection & VM_PROT_WRITE) ? 'w' : '-', (entry->protection & VM_PROT_EXECUTE) ? 'x' : '-', (entry->max_protection & VM_PROT_READ) ? 'r' : '-', (entry->max_protection & VM_PROT_WRITE) ? 'w' : '-', (entry->max_protection & VM_PROT_EXECUTE) ? 'x' : '-', (entry->etype & UVM_ET_COPYONWRITE) ? "COW" : "NCOW", (entry->etype & UVM_ET_NEEDSCOPY) ? "NC" : "NNC", entry->inheritance, entry->wired_count, entry->advice); } if (pos >= bufsize) { bufsize <<= 1; if (bufsize > MAXBUFFERSIZE) { error = ENOMEM; vm_map_unlock_read(map); uvmspace_free(vm); goto out; } free(buffer, M_TEMP); goto again; } } vm_map_unlock_read(map); uvmspace_free(vm); error = uiomove(buffer, pos, uio); out: if (path != NULL) free(path, M_TEMP); if (buffer != NULL) free(buffer, M_TEMP); return error; }
int sys_msync(struct proc *p, void *v, register_t *retval) { struct sys_msync_args /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(int) flags; } */ *uap = v; vaddr_t addr; vsize_t size, pageoff; vm_map_t map; int rv, flags, uvmflags; /* * extract syscall args from the uap */ addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); flags = SCARG(uap, flags); /* sanity check flags */ if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) return (EINVAL); if ((flags & (MS_ASYNC | MS_SYNC)) == 0) flags |= MS_SYNC; /* * align the address to a page boundary, and adjust the size accordingly */ ALIGN_ADDR(addr, size, pageoff); if (addr > SIZE_MAX - size) return (EINVAL); /* disallow wrap-around. */ /* * get map */ map = &p->p_vmspace->vm_map; /* * XXXCDC: do we really need this semantic? * * XXX Gak! If size is zero we are supposed to sync "all modified * pages with the region containing addr". Unfortunately, we * don't really keep track of individual mmaps so we approximate * by flushing the range of the map entry containing addr. * This can be incorrect if the region splits or is coalesced * with a neighbor. */ if (size == 0) { vm_map_entry_t entry; vm_map_lock_read(map); rv = uvm_map_lookup_entry(map, addr, &entry); if (rv == TRUE) { addr = entry->start; size = entry->end - entry->start; } vm_map_unlock_read(map); if (rv == FALSE) return (EINVAL); } /* * translate MS_ flags into PGO_ flags */ uvmflags = PGO_CLEANIT; if (flags & MS_INVALIDATE) uvmflags |= PGO_FREE; if (flags & MS_SYNC) uvmflags |= PGO_SYNCIO; else uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */ return (uvm_map_clean(map, addr, addr+size, uvmflags)); }
void debugger_getprocs_callback(struct allocation_t* ref) { if (!ref) return; struct message_t* message = __get(ref); if (!message) return; // Only handle requests if (message->header.request != 1) goto cleanup; int(*_sx_slock)(struct sx *sx, int opts, const char *file, int line) = kdlsym(_sx_slock); void(*_sx_sunlock)(struct sx *sx, const char *file, int line) = kdlsym(_sx_sunlock); void(*_mtx_lock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_lock_flags); void(*_mtx_unlock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_unlock_flags); struct sx* allproclock = (struct sx*)kdlsym(allproc_lock); struct proclist* allproc = (struct proclist*)*(uint64_t*)kdlsym(allproc); void(*vmspace_free)(struct vmspace *) = kdlsym(vmspace_free); struct vmspace* (*vmspace_acquire_ref)(struct proc *) = kdlsym(vmspace_acquire_ref); void(*_vm_map_lock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_lock_read); void(*_vm_map_unlock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_unlock_read); uint64_t procCount = 0; struct proc* p = NULL; struct debugger_getprocs_t getproc = { 0 }; sx_slock(allproclock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); // Zero out our process information kmemset(&getproc, 0, sizeof(getproc)); // Get the vm map struct vmspace* vm = vmspace_acquire_ref(p); vm_map_t map = &p->p_vmspace->vm_map; vm_map_lock_read(map); struct vm_map_entry* entry = map->header.next; // Copy over all of the address information getproc.process_id = p->p_pid; getproc.text_address = (uint64_t)entry->start; getproc.text_size = (uint64_t)entry->end - entry->start; getproc.data_address = (uint64_t)p->p_vmspace->vm_daddr; getproc.data_size = p->p_vmspace->vm_dsize; // Copy over the name and path kmemcpy(getproc.process_name, p->p_comm, sizeof(getproc.process_name)); kmemcpy(getproc.path, p->p_elfpath, sizeof(getproc.path)); // Write it back to the PC kwrite(message->socket, &getproc, sizeof(getproc)); procCount++; // Free the vmmap vm_map_unlock_read(map); vmspace_free(vm); PROC_UNLOCK(p); } sx_sunlock(allproclock); // Send finalizer, because f**k this shit kmemset(&getproc, 0xDD, sizeof(getproc)); kwrite(message->socket, &getproc, sizeof(getproc)); cleanup: __dec(ref); }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_doprocmap(PFS_FILL_ARGS) { struct vmspace *vm; vm_map_t map; vm_map_entry_t entry, tmp_entry; struct vnode *vp; char *fullpath, *freepath; struct uidinfo *uip; int error, vfslocked; unsigned int last_timestamp; #ifdef COMPAT_FREEBSD32 int wrap32 = 0; #endif PROC_LOCK(p); error = p_candebug(td, p); PROC_UNLOCK(p); if (error) return (error); if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (curproc->p_sysent->sv_flags & SV_ILP32) { if (!(p->p_sysent->sv_flags & SV_ILP32)) return (EOPNOTSUPP); wrap32 = 1; } #endif vm = vmspace_acquire_ref(p); if (vm == NULL) return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t e_start, e_end, addr; int resident, privateresident; char *type; vm_eflags_t e_eflags; vm_prot_t e_prot; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; e_eflags = entry->eflags; e_prot = entry->protection; e_start = entry->start; e_end = entry->end; privateresident = 0; obj = entry->object.vm_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); if (obj->shadow_count == 1) privateresident = obj->resident_page_count; } uip = (entry->uip) ? entry->uip : (obj ? obj->uip : NULL); resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(map->pmap, addr)) resident++; addr += PAGE_SIZE; } for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_LOCK(tobj); if (lobj != obj) VM_OBJECT_UNLOCK(lobj); lobj = tobj; } last_timestamp = map->timestamp; vm_map_unlock_read(map); freepath = NULL; fullpath = "-"; if (lobj) { switch (lobj->type) { default: case OBJT_DEFAULT: type = "default"; vp = NULL; break; case OBJT_VNODE: type = "vnode"; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: type = "swap"; vp = NULL; break; case OBJT_SG: case OBJT_DEVICE: type = "device"; vp = NULL; break; } if (lobj != obj) VM_OBJECT_UNLOCK(lobj); flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; VM_OBJECT_UNLOCK(obj); if (vp != NULL) { vn_fullpath(td, vp, &fullpath, &freepath); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); } } else { type = "none"; flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, resident, private resident, cow, access, type, * charged, charged uid. */ error = sbuf_printf(sb, "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n", (u_long)e_start, (u_long)e_end, resident, privateresident, #ifdef COMPAT_FREEBSD32 wrap32 ? NULL : obj, /* Hide 64 bit value */ #else obj, #endif (e_prot & VM_PROT_READ)?"r":"-", (e_prot & VM_PROT_WRITE)?"w":"-", (e_prot & VM_PROT_EXECUTE)?"x":"-", ref_count, shadow_count, flags, (e_eflags & MAP_ENTRY_COW)?"COW":"NCOW", (e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", type, fullpath, uip ? "CH":"NCH", uip ? uip->ui_uid : -1); if (freepath != NULL) free(freepath, M_TEMP); vm_map_lock_read(map); if (error == -1) { error = 0; break; } if (last_timestamp != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. Specifically, * the entry may have been clipped, merged, or deleted. */ vm_map_lookup_entry(map, e_end - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); return (error); }
kern_return_t vm32_region_info_64( __DEBUG_ONLY vm_map_t map, __DEBUG_ONLY vm32_offset_t address, __DEBUG_ONLY vm_info_region_64_t *regionp, __DEBUG_ONLY vm_info_object_array_t *objectsp, __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else vm_map_copy_t copy; vm_offset_t addr = 0; /* memory for OOL data */ vm_size_t size; /* size of the memory */ unsigned int room; /* room for this many objects */ unsigned int used; /* actually this many objects */ vm_info_region_64_t region; kern_return_t kr; if (map == VM_MAP_NULL) return KERN_INVALID_TASK; size = 0; /* no memory allocated yet */ for (;;) { vm_map_t cmap; /* current map in traversal */ vm_map_t nmap; /* next map to look at */ vm_map_entry_t entry; vm_object_t object, cobject, nobject; /* nothing is locked */ vm_map_lock_read(map); for (cmap = map;; cmap = nmap) { /* cmap is read-locked */ if (!vm_map_lookup_entry(cmap, address, &entry)) { entry = entry->vme_next; if (entry == vm_map_to_entry(cmap)) { vm_map_unlock_read(cmap); if (size != 0) kmem_free(ipc_kernel_map, addr, size); return KERN_NO_SPACE; } } if (entry->is_sub_map) nmap = VME_SUBMAP(entry); else break; /* move down to the lower map */ vm_map_lock_read(nmap); vm_map_unlock_read(cmap); } /* cmap is read-locked; we have a real entry */ object = VME_OBJECT(entry); region.vir_start = (natural_t) entry->vme_start; region.vir_end = (natural_t) entry->vme_end; region.vir_object = (natural_t)(uintptr_t) object; region.vir_offset = VME_OFFSET(entry); region.vir_needs_copy = entry->needs_copy; region.vir_protection = entry->protection; region.vir_max_protection = entry->max_protection; region.vir_inheritance = entry->inheritance; region.vir_wired_count = entry->wired_count; region.vir_user_wired_count = entry->user_wired_count; used = 0; room = (unsigned int) (size / sizeof(vm_info_object_t)); if (object == VM_OBJECT_NULL) { vm_map_unlock_read(cmap); /* no memory needed */ break; } vm_object_lock(object); vm_map_unlock_read(cmap); for (cobject = object;; cobject = nobject) { /* cobject is locked */ if (used < room) { vm_info_object_t *vio = &((vm_info_object_t *) addr)[used]; vio->vio_object = (natural_t)(uintptr_t) cobject; vio->vio_size = (natural_t) cobject->vo_size; vio->vio_ref_count = cobject->ref_count; vio->vio_resident_page_count = cobject->resident_page_count; vio->vio_copy = (natural_t)(uintptr_t) cobject->copy; vio->vio_shadow = (natural_t)(uintptr_t) cobject->shadow; vio->vio_shadow_offset = (natural_t) cobject->vo_shadow_offset; vio->vio_paging_offset = (natural_t) cobject->paging_offset; vio->vio_copy_strategy = cobject->copy_strategy; vio->vio_last_alloc = (vm_offset_t) cobject->last_alloc; vio->vio_paging_in_progress = cobject->paging_in_progress + cobject->activity_in_progress; vio->vio_pager_created = cobject->pager_created; vio->vio_pager_initialized = cobject->pager_initialized; vio->vio_pager_ready = cobject->pager_ready; vio->vio_can_persist = cobject->can_persist; vio->vio_internal = cobject->internal; vio->vio_temporary = cobject->temporary; vio->vio_alive = cobject->alive; vio->vio_purgable = (cobject->purgable != VM_PURGABLE_DENY); vio->vio_purgable_volatile = (cobject->purgable == VM_PURGABLE_VOLATILE || cobject->purgable == VM_PURGABLE_EMPTY); } used++; nobject = cobject->shadow; if (nobject == VM_OBJECT_NULL) { vm_object_unlock(cobject); break; } vm_object_lock(nobject); vm_object_unlock(cobject); } /* nothing locked */ if (used <= room) break; /* must allocate more memory */ if (size != 0) kmem_free(ipc_kernel_map, addr, size); size = vm_map_round_page(2 * used * sizeof(vm_info_object_t), VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; kr = vm_map_wire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); } /* free excess memory; make remaining memory pageable */ if (used == 0) { copy = VM_MAP_COPY_NULL; if (size != 0) kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = (used * sizeof(vm_info_object_t)); vm_size_t vmsize_used = vm_map_round_page(size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size_used, TRUE, ©); assert(kr == KERN_SUCCESS); if (size != vmsize_used) kmem_free(ipc_kernel_map, addr + vmsize_used, size - vmsize_used); } *regionp = region; *objectsp = (vm_info_object_array_t) copy; *objectsCntp = used; return KERN_SUCCESS; #endif /* MACH_VM_DEBUG */ }
/* * The caller must hold proc_token. */ static int do_vmtotal_callback(struct proc *p, void *data) { struct vmtotal *totalp = data; struct lwp *lp; vm_map_entry_t entry; vm_map_t map; int paging; if (p->p_flag & P_SYSTEM) return(0); FOREACH_LWP_IN_PROC(lp, p) { switch (lp->lwp_stat) { case LSSTOP: case LSSLEEP: if ((p->p_flag & P_SWAPPEDOUT) == 0) { if ((lp->lwp_flag & LWP_SINTR) == 0) totalp->t_dw++; else if (lp->lwp_slptime < maxslp) totalp->t_sl++; } else if (lp->lwp_slptime < maxslp) { totalp->t_sw++; } if (lp->lwp_slptime >= maxslp) return(0); break; case LSRUN: if (p->p_flag & P_SWAPPEDOUT) totalp->t_sw++; else totalp->t_rq++; if (p->p_stat == SIDL) return(0); break; default: return (0); } } /* * Note active objects. */ paging = 0; lwkt_gettoken(&vm_token); if (p->p_vmspace) { map = &p->p_vmspace->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->maptype != VM_MAPTYPE_NORMAL && entry->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (entry->object.vm_object == NULL) continue; vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); paging |= entry->object.vm_object->paging_in_progress; } vm_map_unlock_read(map); } lwkt_reltoken(&vm_token); if (paging) totalp->t_pw++; return(0); }