Exemplo n.º 1
0
/*
 * No requirements.
 */
int
useracc(c_caddr_t addr, int len, int rw)
{
	boolean_t rv;
	vm_prot_t prot;
	vm_map_t map;
	vm_map_entry_t save_hint;
	vm_offset_t wrap;

	KASSERT((rw & (~VM_PROT_ALL)) == 0,
	    ("illegal ``rw'' argument to useracc (%x)", rw));
	prot = rw;
	/*
	 * XXX - check separately to disallow access to user area and user
	 * page tables - they are in the map.
	 */
	wrap = (vm_offset_t)addr + len;
	if (wrap > VM_MAX_USER_ADDRESS || wrap < (vm_offset_t)addr) {
		return (FALSE);
	}
	map = &curproc->p_vmspace->vm_map;
	vm_map_lock_read(map);
	/*
	 * We save the map hint, and restore it.  Useracc appears to distort
	 * the map hint unnecessarily.
	 */
	save_hint = map->hint;
	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
				     round_page(wrap), prot, TRUE);
	map->hint = save_hint;
	vm_map_unlock_read(map);
	
	return (rv == TRUE);
}
Exemplo n.º 2
0
static void
get_proc_size_info(struct lwp *l, unsigned long *stext, unsigned long *etext, unsigned long *sstack)
{
	struct proc *p = l->l_proc;
	struct vmspace *vm;
	struct vm_map *map;
	struct vm_map_entry *entry;

	*stext = 0;
	*etext = 0;
	*sstack = 0;

	proc_vmspace_getref(p, &vm);
	map = &vm->vm_map;
	vm_map_lock_read(map);

	for (entry = map->header.next; entry != &map->header;
	    entry = entry->next) {
		if (UVM_ET_ISSUBMAP(entry))
			continue;
		/* assume text is the first entry */
		if (*stext == *etext) {
			*stext = entry->start;
			*etext = entry->end;
			break;
		}
	}
#if defined(LINUX_USRSTACK32) && defined(USRSTACK32)
	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
	    LINUX_USRSTACK32 < USRSTACK32)
		*sstack = (unsigned long)LINUX_USRSTACK32;
	else
#endif
#ifdef LINUX_USRSTACK
	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
	    LINUX_USRSTACK < USRSTACK)
		*sstack = (unsigned long)LINUX_USRSTACK;
	else
#endif
#ifdef	USRSTACK32
	if (strstr(p->p_emul->e_name, "32") != NULL)
		*sstack = (unsigned long)USRSTACK32;
	else
#endif
		*sstack = (unsigned long)USRSTACK;

	/*
	 * jdk 1.6 compares low <= addr && addr < high
	 * if we put addr == high, then the test fails
	 * so eat one page.
	 */
	*sstack -= PAGE_SIZE;

	vm_map_unlock_read(map);
	uvmspace_free(vm);
}
Exemplo n.º 3
0
static int
sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS)
{
	u_long size;

	vm_map_lock_read(kmem_map);
	size = kmem_map->root != NULL ? kmem_map->root->max_free :
	    kmem_map->max_offset - kmem_map->min_offset;
	vm_map_unlock_read(kmem_map);
	return (sysctl_handle_long(oidp, &size, 0, req));
}
Exemplo n.º 4
0
/*
 * No requirements.
 */
int
useracc(c_caddr_t addr, int len, int rw)
{
	boolean_t rv;
	vm_prot_t prot;
	vm_map_t map;
	vm_offset_t wrap;
	vm_offset_t gpa;

	KASSERT((rw & (~VM_PROT_ALL)) == 0,
	    ("illegal ``rw'' argument to useracc (%x)", rw));
	prot = rw;

	if (curthread->td_vmm) {
		if (vmm_vm_get_gpa(curproc, (register_t *)&gpa, (register_t) addr))
			panic("%s: could not get GPA\n", __func__);
		addr = (c_caddr_t) gpa;
	}

	/*
	 * XXX - check separately to disallow access to user area and user
	 * page tables - they are in the map.
	 */
	wrap = (vm_offset_t)addr + len;
	if (wrap > VM_MAX_USER_ADDRESS || wrap < (vm_offset_t)addr) {
		return (FALSE);
	}
	map = &curproc->p_vmspace->vm_map;
	vm_map_lock_read(map);

	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
				     round_page(wrap), prot, TRUE);
	vm_map_unlock_read(map);
	
	return (rv == TRUE);
}
Exemplo n.º 5
0
int
fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t  *vnodeaddr, uint32_t  *vid)
{

	vm_map_t map;
	vm_map_offset_t	address = (vm_map_offset_t )arg;
	vm_map_entry_t		tmp_entry;
	vm_map_entry_t		entry;
	vm_map_offset_t		start;
	vm_region_extended_info_data_t extended;
	vm_region_top_info_data_t top;

	    task_lock(task);
	    map = task->map;
	    if (map == VM_MAP_NULL) 
	    {
			task_unlock(task);
			return(0);
	    }
	    vm_map_reference(map); 
	    task_unlock(task);
	    
	    vm_map_lock_read(map);

	    start = address;
	    if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
		if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
			vm_map_unlock_read(map);
	    		vm_map_deallocate(map); 
		   	return(0);
		}
	    } else {
		entry = tmp_entry;
	    }

	    start = entry->vme_start;

	    pinfo->pri_offset = entry->offset;
	    pinfo->pri_protection = entry->protection;
	    pinfo->pri_max_protection = entry->max_protection;
	    pinfo->pri_inheritance = entry->inheritance;
	    pinfo->pri_behavior = entry->behavior;
	    pinfo->pri_user_wired_count = entry->user_wired_count;
	    pinfo->pri_user_tag = entry->alias;

	    if (entry->is_sub_map) {
		pinfo->pri_flags |= PROC_REGION_SUBMAP;
	    } else {
		if (entry->is_shared)
			pinfo->pri_flags |= PROC_REGION_SHARED;
	    }


	    extended.protection = entry->protection;
	    extended.user_tag = entry->alias;
	    extended.pages_resident = 0;
	    extended.pages_swapped_out = 0;
	    extended.pages_shared_now_private = 0;
	    extended.pages_dirtied = 0;
	    extended.external_pager = 0;
	    extended.shadow_depth = 0;

	    vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);

	    if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
	            extended.share_mode = SM_PRIVATE;

	    top.private_pages_resident = 0;
	    top.shared_pages_resident = 0;
	    vm_map_region_top_walk(entry, &top);

	
	    pinfo->pri_pages_resident = extended.pages_resident;
	    pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
	    pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
	    pinfo->pri_pages_dirtied = extended.pages_dirtied;
	    pinfo->pri_ref_count = extended.ref_count;
	    pinfo->pri_shadow_depth = extended.shadow_depth;
	    pinfo->pri_share_mode = extended.share_mode;

	    pinfo->pri_private_pages_resident = top.private_pages_resident;
	    pinfo->pri_shared_pages_resident = top.shared_pages_resident;
	    pinfo->pri_obj_id = top.obj_id;
		
	    pinfo->pri_address = (uint64_t)start;
	    pinfo->pri_size = (uint64_t)(entry->vme_end - start);
	    pinfo->pri_depth = 0;
	
	    if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
		*vnodeaddr = (uint32_t)0;

		if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
			vm_map_unlock_read(map);
	    		vm_map_deallocate(map); 
			return(1);
		}
	    }

	    vm_map_unlock_read(map);
	    vm_map_deallocate(map); 
	    return(1);
}
Exemplo n.º 6
0
/*
 * mincore system call handler
 *
 * mincore_args(const void *addr, size_t len, char *vec)
 *
 * No requirements
 */
int
sys_mincore(struct mincore_args *uap)
{
	struct proc *p = curproc;
	vm_offset_t addr, first_addr;
	vm_offset_t end, cend;
	pmap_t pmap;
	vm_map_t map;
	char *vec;
	int error;
	int vecindex, lastvecindex;
	vm_map_entry_t current;
	vm_map_entry_t entry;
	int mincoreinfo;
	unsigned int timestamp;

	/*
	 * Make sure that the addresses presented are valid for user
	 * mode.
	 */
	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
	end = addr + (vm_size_t)round_page(uap->len);
	if (end < addr)
		return (EINVAL);
	if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS)
		return (EINVAL);

	/*
	 * Address of byte vector
	 */
	vec = uap->vec;

	map = &p->p_vmspace->vm_map;
	pmap = vmspace_pmap(p->p_vmspace);

	lwkt_gettoken(&map->token);
	vm_map_lock_read(map);
RestartScan:
	timestamp = map->timestamp;

	if (!vm_map_lookup_entry(map, addr, &entry))
		entry = entry->next;

	/*
	 * Do this on a map entry basis so that if the pages are not
	 * in the current processes address space, we can easily look
	 * up the pages elsewhere.
	 */
	lastvecindex = -1;
	for(current = entry;
		(current != &map->header) && (current->start < end);
		current = current->next) {

		/*
		 * ignore submaps (for now) or null objects
		 */
		if (current->maptype != VM_MAPTYPE_NORMAL &&
		    current->maptype != VM_MAPTYPE_VPAGETABLE) {
			continue;
		}
		if (current->object.vm_object == NULL)
			continue;
		
		/*
		 * limit this scan to the current map entry and the
		 * limits for the mincore call
		 */
		if (addr < current->start)
			addr = current->start;
		cend = current->end;
		if (cend > end)
			cend = end;

		/*
		 * scan this entry one page at a time
		 */
		while (addr < cend) {
			/*
			 * Check pmap first, it is likely faster, also
			 * it can provide info as to whether we are the
			 * one referencing or modifying the page.
			 *
			 * If we have to check the VM object, only mess
			 * around with normal maps.  Do not mess around
			 * with virtual page tables (XXX).
			 */
			mincoreinfo = pmap_mincore(pmap, addr);
			if (mincoreinfo == 0 &&
			    current->maptype == VM_MAPTYPE_NORMAL) {
				vm_pindex_t pindex;
				vm_ooffset_t offset;
				vm_page_t m;

				/*
				 * calculate the page index into the object
				 */
				offset = current->offset + (addr - current->start);
				pindex = OFF_TO_IDX(offset);

				/*
				 * if the page is resident, then gather 
				 * information about it.  spl protection is
				 * required to maintain the object 
				 * association.  And XXX what if the page is
				 * busy?  What's the deal with that?
				 *
				 * XXX vm_token - legacy for pmap_ts_referenced
				 *     in i386 and vkernel pmap code.
				 */
				lwkt_gettoken(&vm_token);
				vm_object_hold(current->object.vm_object);
				m = vm_page_lookup(current->object.vm_object,
						    pindex);
				if (m && m->valid) {
					mincoreinfo = MINCORE_INCORE;
					if (m->dirty ||
						pmap_is_modified(m))
						mincoreinfo |= MINCORE_MODIFIED_OTHER;
					if ((m->flags & PG_REFERENCED) ||
						pmap_ts_referenced(m)) {
						vm_page_flag_set(m, PG_REFERENCED);
						mincoreinfo |= MINCORE_REFERENCED_OTHER;
					}
				}
				vm_object_drop(current->object.vm_object);
				lwkt_reltoken(&vm_token);
			}

			/*
			 * subyte may page fault.  In case it needs to modify
			 * the map, we release the lock.
			 */
			vm_map_unlock_read(map);

			/*
			 * calculate index into user supplied byte vector
			 */
			vecindex = OFF_TO_IDX(addr - first_addr);

			/*
			 * If we have skipped map entries, we need to make sure that
			 * the byte vector is zeroed for those skipped entries.
			 */
			while((lastvecindex + 1) < vecindex) {
				error = subyte( vec + lastvecindex, 0);
				if (error) {
					error = EFAULT;
					goto done;
				}
				++lastvecindex;
			}

			/*
			 * Pass the page information to the user
			 */
			error = subyte( vec + vecindex, mincoreinfo);
			if (error) {
				error = EFAULT;
				goto done;
			}

			/*
			 * If the map has changed, due to the subyte, the previous
			 * output may be invalid.
			 */
			vm_map_lock_read(map);
			if (timestamp != map->timestamp)
				goto RestartScan;

			lastvecindex = vecindex;
			addr += PAGE_SIZE;
		}
	}

	/*
	 * subyte may page fault.  In case it needs to modify
	 * the map, we release the lock.
	 */
	vm_map_unlock_read(map);

	/*
	 * Zero the last entries in the byte vector.
	 */
	vecindex = OFF_TO_IDX(end - first_addr);
	while((lastvecindex + 1) < vecindex) {
		error = subyte( vec + lastvecindex, 0);
		if (error) {
			error = EFAULT;
			goto done;
		}
		++lastvecindex;
	}
	
	/*
	 * If the map has changed, due to the subyte, the previous
	 * output may be invalid.
	 */
	vm_map_lock_read(map);
	if (timestamp != map->timestamp)
		goto RestartScan;
	vm_map_unlock_read(map);

	error = 0;
done:
	lwkt_reltoken(&map->token);
	return (error);
}
Exemplo n.º 7
0
/*
 * msync system call handler 
 *
 * msync_args(void *addr, size_t len, int flags)
 *
 * No requirements
 */
int
sys_msync(struct msync_args *uap)
{
	struct proc *p = curproc;
	vm_offset_t addr;
	vm_offset_t tmpaddr;
	vm_size_t size, pageoff;
	int flags;
	vm_map_t map;
	int rv;

	addr = (vm_offset_t) uap->addr;
	size = uap->len;
	flags = uap->flags;

	pageoff = (addr & PAGE_MASK);
	addr -= pageoff;
	size += pageoff;
	size = (vm_size_t) round_page(size);
	if (size < uap->len)		/* wrap */
		return(EINVAL);
	tmpaddr = addr + size;		/* workaround gcc4 opt */
	if (tmpaddr < addr)		/* wrap */
		return(EINVAL);

	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
		return (EINVAL);

	map = &p->p_vmspace->vm_map;

	/*
	 * map->token serializes extracting the address range for size == 0
	 * msyncs with the vm_map_clean call; if the token were not held
	 * across the two calls, an intervening munmap/mmap pair, for example,
	 * could cause msync to occur on a wrong region.
	 */
	lwkt_gettoken(&map->token);

	/*
	 * XXX Gak!  If size is zero we are supposed to sync "all modified
	 * pages with the region containing addr".  Unfortunately, we don't
	 * really keep track of individual mmaps so we approximate by flushing
	 * the range of the map entry containing addr. This can be incorrect
	 * if the region splits or is coalesced with a neighbor.
	 */
	if (size == 0) {
		vm_map_entry_t entry;

		vm_map_lock_read(map);
		rv = vm_map_lookup_entry(map, addr, &entry);
		if (rv == FALSE) {
			vm_map_unlock_read(map);
			rv = KERN_INVALID_ADDRESS;
			goto done;
		}
		addr = entry->start;
		size = entry->end - entry->start;
		vm_map_unlock_read(map);
	}

	/*
	 * Clean the pages and interpret the return value.
	 */
	rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
			  (flags & MS_INVALIDATE) != 0);
done:
	lwkt_reltoken(&map->token);

	switch (rv) {
	case KERN_SUCCESS:
		break;
	case KERN_INVALID_ADDRESS:
		return (EINVAL);	/* Sun returns ENOMEM? */
	case KERN_FAILURE:
		return (EIO);
	default:
		return (EINVAL);
	}

	return (0);
}
Exemplo n.º 8
0
/* ARGSUSED */
int
sys_mincore(struct proc *p, void *v, register_t *retval)
{
	struct sys_mincore_args /* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(char *) vec;
	} */ *uap = v;
	vm_page_t m;
	char *vec, pgi;
	struct uvm_object *uobj;
	struct vm_amap *amap;
	struct vm_anon *anon;
	vm_map_entry_t entry;
	vaddr_t start, end, lim;
	vm_map_t map;
	vsize_t len, npgs;
	int error = 0;

	map = &p->p_vmspace->vm_map;

	start = (vaddr_t)SCARG(uap, addr);
	len = SCARG(uap, len);
	vec = SCARG(uap, vec);

	if (start & PAGE_MASK)
		return (EINVAL);
	len = round_page(len);
	end = start + len;
	if (end <= start)
		return (EINVAL);

	npgs = len >> PAGE_SHIFT;

	/*
	 * Lock down vec, so our returned status isn't outdated by
	 * storing the status byte for a page.
	 */
	if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0)
		return (error);

	vm_map_lock_read(map);

	if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
		error = ENOMEM;
		goto out;
	}

	for (/* nothing */;
	     entry != &map->header && entry->start < end;
	     entry = entry->next) {
		KASSERT(!UVM_ET_ISSUBMAP(entry));
		KASSERT(start >= entry->start);

		/* Make sure there are no holes. */
		if (entry->end < end &&
		     (entry->next == &map->header ||
		      entry->next->start > entry->end)) {
			error = ENOMEM;
			goto out;
		}

		lim = end < entry->end ? end : entry->end;

		/*
		 * Special case for objects with no "real" pages.  Those
		 * are always considered resident (mapped devices).
		 */
		if (UVM_ET_ISOBJ(entry)) {
			KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
			if (entry->object.uvm_obj->pgops->pgo_releasepg
			    == NULL) {
				pgi = 1;
				for (/* nothing */; start < lim;
				     start += PAGE_SIZE, vec++)
					copyout(&pgi, vec, sizeof(char));
				continue;
			}
		}

		amap = entry->aref.ar_amap;	/* top layer */
		uobj = entry->object.uvm_obj;	/* bottom layer */

		if (uobj != NULL)
			simple_lock(&uobj->vmobjlock);

		for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
			pgi = 0;
			if (amap != NULL) {
				/* Check the top layer first. */
				anon = amap_lookup(&entry->aref,
				    start - entry->start);
				/* Don't need to lock anon here. */
				if (anon != NULL && anon->an_page != NULL) {
					/*
					 * Anon has the page for this entry
					 * offset.
					 */
					pgi = 1;
				}
			}

			if (uobj != NULL && pgi == 0) {
				/* Check the bottom layer. */
				m = uvm_pagelookup(uobj,
				    entry->offset + (start - entry->start));
				if (m != NULL) {
					/*
					 * Object has the page for this entry
					 * offset.
					 */
					pgi = 1;
				}
			}

			copyout(&pgi, vec, sizeof(char));
		}

		if (uobj != NULL)
			simple_unlock(&uobj->vmobjlock);
	}

 out:
	vm_map_unlock_read(map);
	uvm_vsunlock(p, SCARG(uap, vec), npgs);
	return (error);
}
Exemplo n.º 9
0
int
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
    int fault_flags, vm_page_t *m_hold)
{
	vm_prot_t prot;
	long ahead, behind;
	int alloc_req, era, faultcount, nera, reqpage, result;
	boolean_t growstack, is_first_object_locked, wired;
	int map_generation;
	vm_object_t next_object;
	vm_page_t marray[VM_FAULT_READ_MAX];
	int hardfault;
	struct faultstate fs;
	struct vnode *vp;
	int locked, error;

	hardfault = 0;
	growstack = TRUE;
	PCPU_INC(cnt.v_vm_faults);
	fs.vp = NULL;
	fs.vfslocked = 0;
	faultcount = reqpage = 0;

RetryFault:;

	/*
	 * Find the backing store object and offset into it to begin the
	 * search.
	 */
	fs.map = map;
	result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
	    &fs.first_object, &fs.first_pindex, &prot, &wired);
	if (result != KERN_SUCCESS) {
		if (growstack && result == KERN_INVALID_ADDRESS &&
		    map != kernel_map) {
			result = vm_map_growstack(curproc, vaddr);
			if (result != KERN_SUCCESS)
				return (KERN_FAILURE);
			growstack = FALSE;
			goto RetryFault;
		}
		return (result);
	}

	map_generation = fs.map->timestamp;

	if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
		if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) {
			vm_map_unlock_read(fs.map);
			return (KERN_FAILURE);
		}
		panic("vm_fault: fault on nofault entry, addr: %lx",
		    (u_long)vaddr);
	}

	if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION &&
	    fs.entry->wiring_thread != curthread) {
		vm_map_unlock_read(fs.map);
		vm_map_lock(fs.map);
		if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) &&
		    (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
			fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
			vm_map_unlock_and_wait(fs.map, 0);
		} else
			vm_map_unlock(fs.map);
		goto RetryFault;
	}

	/*
	 * Make a reference to this object to prevent its disposal while we
	 * are messing with it.  Once we have the reference, the map is free
	 * to be diddled.  Since objects reference their shadows (and copies),
	 * they will stay around as well.
	 *
	 * Bump the paging-in-progress count to prevent size changes (e.g. 
	 * truncation operations) during I/O.  This must be done after
	 * obtaining the vnode lock in order to avoid possible deadlocks.
	 */
	VM_OBJECT_LOCK(fs.first_object);
	vm_object_reference_locked(fs.first_object);
	vm_object_pip_add(fs.first_object, 1);

	fs.lookup_still_valid = TRUE;

	if (wired)
		fault_type = prot | (fault_type & VM_PROT_COPY);

	fs.first_m = NULL;

	/*
	 * Search for the page at object/offset.
	 */
	fs.object = fs.first_object;
	fs.pindex = fs.first_pindex;
	while (TRUE) {
		/*
		 * If the object is dead, we stop here
		 */
		if (fs.object->flags & OBJ_DEAD) {
			unlock_and_deallocate(&fs);
			return (KERN_PROTECTION_FAILURE);
		}

		/*
		 * See if page is resident
		 */
		fs.m = vm_page_lookup(fs.object, fs.pindex);
		if (fs.m != NULL) {
			/* 
			 * check for page-based copy on write.
			 * We check fs.object == fs.first_object so
			 * as to ensure the legacy COW mechanism is
			 * used when the page in question is part of
			 * a shadow object.  Otherwise, vm_page_cowfault()
			 * removes the page from the backing object, 
			 * which is not what we want.
			 */
			vm_page_lock(fs.m);
			if ((fs.m->cow) && 
			    (fault_type & VM_PROT_WRITE) &&
			    (fs.object == fs.first_object)) {
				vm_page_cowfault(fs.m);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Wait/Retry if the page is busy.  We have to do this
			 * if the page is busy via either VPO_BUSY or 
			 * vm_page_t->busy because the vm_pager may be using
			 * vm_page_t->busy for pageouts ( and even pageins if
			 * it is the vnode pager ), and we could end up trying
			 * to pagein and pageout the same page simultaneously.
			 *
			 * We can theoretically allow the busy case on a read
			 * fault if the page is marked valid, but since such
			 * pages are typically already pmap'd, putting that
			 * special case in might be more effort then it is 
			 * worth.  We cannot under any circumstances mess
			 * around with a vm_page_t->busy page except, perhaps,
			 * to pmap it.
			 */
			if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
				/*
				 * Reference the page before unlocking and
				 * sleeping so that the page daemon is less
				 * likely to reclaim it. 
				 */
				vm_page_aflag_set(fs.m, PGA_REFERENCED);
				vm_page_unlock(fs.m);
				if (fs.object != fs.first_object) {
					if (!VM_OBJECT_TRYLOCK(
					    fs.first_object)) {
						VM_OBJECT_UNLOCK(fs.object);
						VM_OBJECT_LOCK(fs.first_object);
						VM_OBJECT_LOCK(fs.object);
					}
					vm_page_lock(fs.first_m);
					vm_page_free(fs.first_m);
					vm_page_unlock(fs.first_m);
					vm_object_pip_wakeup(fs.first_object);
					VM_OBJECT_UNLOCK(fs.first_object);
					fs.first_m = NULL;
				}
				unlock_map(&fs);
				if (fs.m == vm_page_lookup(fs.object,
				    fs.pindex)) {
					vm_page_sleep_if_busy(fs.m, TRUE,
					    "vmpfw");
				}
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_UNLOCK(fs.object);
				PCPU_INC(cnt.v_intrans);
				vm_object_deallocate(fs.first_object);
				goto RetryFault;
			}
			vm_pageq_remove(fs.m);
			vm_page_unlock(fs.m);

			/*
			 * Mark page busy for other processes, and the 
			 * pagedaemon.  If it still isn't completely valid
			 * (readable), jump to readrest, else break-out ( we
			 * found the page ).
			 */
			vm_page_busy(fs.m);
			if (fs.m->valid != VM_PAGE_BITS_ALL)
				goto readrest;
			break;
		}

		/*
		 * Page is not resident, If this is the search termination
		 * or the pager might contain the page, allocate a new page.
		 */
		if (TRYPAGER || fs.object == fs.first_object) {
			if (fs.pindex >= fs.object->size) {
				unlock_and_deallocate(&fs);
				return (KERN_PROTECTION_FAILURE);
			}

			/*
			 * Allocate a new page for this object/offset pair.
			 *
			 * Unlocked read of the p_flag is harmless. At
			 * worst, the P_KILLED might be not observed
			 * there, and allocation can fail, causing
			 * restart and new reading of the p_flag.
			 */
			fs.m = NULL;
			if (!vm_page_count_severe() || P_KILLED(curproc)) {
#if VM_NRESERVLEVEL > 0
				if ((fs.object->flags & OBJ_COLORED) == 0) {
					fs.object->flags |= OBJ_COLORED;
					fs.object->pg_color = atop(vaddr) -
					    fs.pindex;
				}
#endif
				alloc_req = P_KILLED(curproc) ?
				    VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
				if (fs.object->type != OBJT_VNODE &&
				    fs.object->backing_object == NULL)
					alloc_req |= VM_ALLOC_ZERO;
				fs.m = vm_page_alloc(fs.object, fs.pindex,
				    alloc_req);
			}
			if (fs.m == NULL) {
				unlock_and_deallocate(&fs);
				VM_WAITPFAULT;
				goto RetryFault;
			} else if (fs.m->valid == VM_PAGE_BITS_ALL)
				break;
		}

readrest:
		/*
		 * We have found a valid page or we have allocated a new page.
		 * The page thus may not be valid or may not be entirely 
		 * valid.
		 *
		 * Attempt to fault-in the page if there is a chance that the
		 * pager has it, and potentially fault in additional pages
		 * at the same time.
		 */
		if (TRYPAGER) {
			int rv;
			u_char behavior = vm_map_entry_behavior(fs.entry);

			if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
			    P_KILLED(curproc)) {
				behind = 0;
				ahead = 0;
			} else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
				behind = 0;
				ahead = atop(fs.entry->end - vaddr) - 1;
				if (ahead > VM_FAULT_READ_AHEAD_MAX)
					ahead = VM_FAULT_READ_AHEAD_MAX;
				if (fs.pindex == fs.entry->next_read)
					vm_fault_cache_behind(&fs,
					    VM_FAULT_READ_MAX);
			} else {
				/*
				 * If this is a sequential page fault, then
				 * arithmetically increase the number of pages
				 * in the read-ahead window.  Otherwise, reset
				 * the read-ahead window to its smallest size.
				 */
				behind = atop(vaddr - fs.entry->start);
				if (behind > VM_FAULT_READ_BEHIND)
					behind = VM_FAULT_READ_BEHIND;
				ahead = atop(fs.entry->end - vaddr) - 1;
				era = fs.entry->read_ahead;
				if (fs.pindex == fs.entry->next_read) {
					nera = era + behind;
					if (nera > VM_FAULT_READ_AHEAD_MAX)
						nera = VM_FAULT_READ_AHEAD_MAX;
					behind = 0;
					if (ahead > nera)
						ahead = nera;
					if (era == VM_FAULT_READ_AHEAD_MAX)
						vm_fault_cache_behind(&fs,
						    VM_FAULT_CACHE_BEHIND);
				} else if (ahead > VM_FAULT_READ_AHEAD_MIN)
					ahead = VM_FAULT_READ_AHEAD_MIN;
				if (era != ahead)
					fs.entry->read_ahead = ahead;
			}

			/*
			 * Call the pager to retrieve the data, if any, after
			 * releasing the lock on the map.  We hold a ref on
			 * fs.object and the pages are VPO_BUSY'd.
			 */
			unlock_map(&fs);

vnode_lock:
			if (fs.object->type == OBJT_VNODE) {
				vp = fs.object->handle;
				if (vp == fs.vp)
					goto vnode_locked;
				else if (fs.vp != NULL) {
					vput(fs.vp);
					fs.vp = NULL;
				}
				locked = VOP_ISLOCKED(vp);

				if (VFS_NEEDSGIANT(vp->v_mount) && !fs.vfslocked) {
					fs.vfslocked = 1;
					if (!mtx_trylock(&Giant)) {
						VM_OBJECT_UNLOCK(fs.object);
						mtx_lock(&Giant);
						VM_OBJECT_LOCK(fs.object);
						goto vnode_lock;
					}
				}
				if (locked != LK_EXCLUSIVE)
					locked = LK_SHARED;
				/* Do not sleep for vnode lock while fs.m is busy */
				error = vget(vp, locked | LK_CANRECURSE |
				    LK_NOWAIT, curthread);
				if (error != 0) {
					int vfslocked;

					vfslocked = fs.vfslocked;
					fs.vfslocked = 0; /* Keep Giant */
					vhold(vp);
					release_page(&fs);
					unlock_and_deallocate(&fs);
					error = vget(vp, locked | LK_RETRY |
					    LK_CANRECURSE, curthread);
					vdrop(vp);
					fs.vp = vp;
					fs.vfslocked = vfslocked;
					KASSERT(error == 0,
					    ("vm_fault: vget failed"));
					goto RetryFault;
				}
				fs.vp = vp;
			}
vnode_locked:
			KASSERT(fs.vp == NULL || !fs.map->system_map,
			    ("vm_fault: vnode-backed object mapped by system map"));

			/*
			 * now we find out if any other pages should be paged
			 * in at this time this routine checks to see if the
			 * pages surrounding this fault reside in the same
			 * object as the page for this fault.  If they do,
			 * then they are faulted in also into the object.  The
			 * array "marray" returned contains an array of
			 * vm_page_t structs where one of them is the
			 * vm_page_t passed to the routine.  The reqpage
			 * return value is the index into the marray for the
			 * vm_page_t passed to the routine.
			 *
			 * fs.m plus the additional pages are VPO_BUSY'd.
			 */
			faultcount = vm_fault_additional_pages(
			    fs.m, behind, ahead, marray, &reqpage);

			rv = faultcount ?
			    vm_pager_get_pages(fs.object, marray, faultcount,
				reqpage) : VM_PAGER_FAIL;

			if (rv == VM_PAGER_OK) {
				/*
				 * Found the page. Leave it busy while we play
				 * with it.
				 */

				/*
				 * Relookup in case pager changed page. Pager
				 * is responsible for disposition of old page
				 * if moved.
				 */
				fs.m = vm_page_lookup(fs.object, fs.pindex);
				if (!fs.m) {
					unlock_and_deallocate(&fs);
					goto RetryFault;
				}

				hardfault++;
				break; /* break to PAGE HAS BEEN FOUND */
			}
			/*
			 * Remove the bogus page (which does not exist at this
			 * object/offset); before doing so, we must get back
			 * our object lock to preserve our invariant.
			 *
			 * Also wake up any other process that may want to bring
			 * in this page.
			 *
			 * If this is the top-level object, we must leave the
			 * busy page to prevent another process from rushing
			 * past us, and inserting the page in that object at
			 * the same time that we are.
			 */
			if (rv == VM_PAGER_ERROR)
				printf("vm_fault: pager read error, pid %d (%s)\n",
				    curproc->p_pid, curproc->p_comm);
			/*
			 * Data outside the range of the pager or an I/O error
			 */
			/*
			 * XXX - the check for kernel_map is a kludge to work
			 * around having the machine panic on a kernel space
			 * fault w/ I/O error.
			 */
			if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) ||
				(rv == VM_PAGER_BAD)) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				unlock_and_deallocate(&fs);
				return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
			}
			if (fs.object != fs.first_object) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				/*
				 * XXX - we cannot just fall out at this
				 * point, m has been freed and is invalid!
				 */
			}
		}

		/*
		 * We get here if the object has default pager (or unwiring) 
		 * or the pager doesn't have the page.
		 */
		if (fs.object == fs.first_object)
			fs.first_m = fs.m;

		/*
		 * Move on to the next object.  Lock the next object before
		 * unlocking the current one.
		 */
		fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset);
		next_object = fs.object->backing_object;
		if (next_object == NULL) {
			/*
			 * If there's no object left, fill the page in the top
			 * object with zeros.
			 */
			if (fs.object != fs.first_object) {
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_UNLOCK(fs.object);

				fs.object = fs.first_object;
				fs.pindex = fs.first_pindex;
				fs.m = fs.first_m;
				VM_OBJECT_LOCK(fs.object);
			}
			fs.first_m = NULL;

			/*
			 * Zero the page if necessary and mark it valid.
			 */
			if ((fs.m->flags & PG_ZERO) == 0) {
				pmap_zero_page(fs.m);
			} else {
				PCPU_INC(cnt.v_ozfod);
			}
			PCPU_INC(cnt.v_zfod);
			fs.m->valid = VM_PAGE_BITS_ALL;
			break;	/* break to PAGE HAS BEEN FOUND */
		} else {
			KASSERT(fs.object != next_object,
			    ("object loop %p", next_object));
			VM_OBJECT_LOCK(next_object);
			vm_object_pip_add(next_object, 1);
			if (fs.object != fs.first_object)
				vm_object_pip_wakeup(fs.object);
			VM_OBJECT_UNLOCK(fs.object);
			fs.object = next_object;
		}
	}

	KASSERT((fs.m->oflags & VPO_BUSY) != 0,
	    ("vm_fault: not busy after main loop"));

	/*
	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
	 * is held.]
	 */

	/*
	 * If the page is being written, but isn't already owned by the
	 * top-level object, we have to copy it into a new page owned by the
	 * top-level object.
	 */
	if (fs.object != fs.first_object) {
		/*
		 * We only really need to copy if we want to write it.
		 */
		if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
			/*
			 * This allows pages to be virtually copied from a 
			 * backing_object into the first_object, where the 
			 * backing object has no other refs to it, and cannot
			 * gain any more refs.  Instead of a bcopy, we just 
			 * move the page from the backing object to the 
			 * first object.  Note that we must mark the page 
			 * dirty in the first object so that it will go out 
			 * to swap when needed.
			 */
			is_first_object_locked = FALSE;
			if (
				/*
				 * Only one shadow object
				 */
				(fs.object->shadow_count == 1) &&
				/*
				 * No COW refs, except us
				 */
				(fs.object->ref_count == 1) &&
				/*
				 * No one else can look this object up
				 */
				(fs.object->handle == NULL) &&
				/*
				 * No other ways to look the object up
				 */
				((fs.object->type == OBJT_DEFAULT) ||
				 (fs.object->type == OBJT_SWAP)) &&
			    (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object)) &&
				/*
				 * We don't chase down the shadow chain
				 */
			    fs.object == fs.first_object->backing_object) {
				/*
				 * get rid of the unnecessary page
				 */
				vm_page_lock(fs.first_m);
				vm_page_free(fs.first_m);
				vm_page_unlock(fs.first_m);
				/*
				 * grab the page and put it into the 
				 * process'es object.  The page is 
				 * automatically made dirty.
				 */
				vm_page_lock(fs.m);
				vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
				vm_page_unlock(fs.m);
				vm_page_busy(fs.m);
				fs.first_m = fs.m;
				fs.m = NULL;
				PCPU_INC(cnt.v_cow_optim);
			} else {
				/*
				 * Oh, well, lets copy it.
				 */
				pmap_copy_page(fs.m, fs.first_m);
				fs.first_m->valid = VM_PAGE_BITS_ALL;
				if (wired && (fault_flags &
				    VM_FAULT_CHANGE_WIRING) == 0) {
					vm_page_lock(fs.first_m);
					vm_page_wire(fs.first_m);
					vm_page_unlock(fs.first_m);
					
					vm_page_lock(fs.m);
					vm_page_unwire(fs.m, FALSE);
					vm_page_unlock(fs.m);
				}
				/*
				 * We no longer need the old page or object.
				 */
				release_page(&fs);
			}
			/*
			 * fs.object != fs.first_object due to above 
			 * conditional
			 */
			vm_object_pip_wakeup(fs.object);
			VM_OBJECT_UNLOCK(fs.object);
			/*
			 * Only use the new page below...
			 */
			fs.object = fs.first_object;
			fs.pindex = fs.first_pindex;
			fs.m = fs.first_m;
			if (!is_first_object_locked)
				VM_OBJECT_LOCK(fs.object);
			PCPU_INC(cnt.v_cow_faults);
			curthread->td_cow++;
		} else {
			prot &= ~VM_PROT_WRITE;
		}
	}

	/*
	 * We must verify that the maps have not changed since our last
	 * lookup.
	 */
	if (!fs.lookup_still_valid) {
		vm_object_t retry_object;
		vm_pindex_t retry_pindex;
		vm_prot_t retry_prot;

		if (!vm_map_trylock_read(fs.map)) {
			release_page(&fs);
			unlock_and_deallocate(&fs);
			goto RetryFault;
		}
		fs.lookup_still_valid = TRUE;
		if (fs.map->timestamp != map_generation) {
			result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
			    &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);

			/*
			 * If we don't need the page any longer, put it on the inactive
			 * list (the easiest thing to do here).  If no one needs it,
			 * pageout will grab it eventually.
			 */
			if (result != KERN_SUCCESS) {
				release_page(&fs);
				unlock_and_deallocate(&fs);

				/*
				 * If retry of map lookup would have blocked then
				 * retry fault from start.
				 */
				if (result == KERN_FAILURE)
					goto RetryFault;
				return (result);
			}
			if ((retry_object != fs.first_object) ||
			    (retry_pindex != fs.first_pindex)) {
				release_page(&fs);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Check whether the protection has changed or the object has
			 * been copied while we left the map unlocked. Changing from
			 * read to write permission is OK - we leave the page
			 * write-protected, and catch the write fault. Changing from
			 * write to read permission means that we can't mark the page
			 * write-enabled after all.
			 */
			prot &= retry_prot;
		}
	}
	/*
	 * If the page was filled by a pager, update the map entry's
	 * last read offset.  Since the pager does not return the
	 * actual set of pages that it read, this update is based on
	 * the requested set.  Typically, the requested and actual
	 * sets are the same.
	 *
	 * XXX The following assignment modifies the map
	 * without holding a write lock on it.
	 */
	if (hardfault)
		fs.entry->next_read = fs.pindex + faultcount - reqpage;

	if ((prot & VM_PROT_WRITE) != 0 ||
	    (fault_flags & VM_FAULT_DIRTY) != 0) {
		vm_object_set_writeable_dirty(fs.object);

		/*
		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
		 * if the page is already dirty to prevent data written with
		 * the expectation of being synced from not being synced.
		 * Likewise if this entry does not request NOSYNC then make
		 * sure the page isn't marked NOSYNC.  Applications sharing
		 * data should use the same flags to avoid ping ponging.
		 */
		if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
			if (fs.m->dirty == 0)
				fs.m->oflags |= VPO_NOSYNC;
		} else {
			fs.m->oflags &= ~VPO_NOSYNC;
		}

		/*
		 * If the fault is a write, we know that this page is being
		 * written NOW so dirty it explicitly to save on 
		 * pmap_is_modified() calls later.
		 *
		 * Also tell the backing pager, if any, that it should remove
		 * any swap backing since the page is now dirty.
		 */
		if (((fault_type & VM_PROT_WRITE) != 0 &&
		    (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
		    (fault_flags & VM_FAULT_DIRTY) != 0) {
			vm_page_dirty(fs.m);
			vm_pager_page_unswapped(fs.m);
		}
	}

	/*
	 * Page had better still be busy
	 */
	KASSERT(fs.m->oflags & VPO_BUSY,
		("vm_fault: page %p not busy!", fs.m));
	/*
	 * Page must be completely valid or it is not fit to
	 * map into user space.  vm_pager_get_pages() ensures this.
	 */
	KASSERT(fs.m->valid == VM_PAGE_BITS_ALL,
	    ("vm_fault: page %p partially invalid", fs.m));
	VM_OBJECT_UNLOCK(fs.object);

	/*
	 * Put this page into the physical map.  We had to do the unlock above
	 * because pmap_enter() may sleep.  We don't put the page
	 * back on the active queue until later so that the pageout daemon
	 * won't find it (yet).
	 */
	pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired);
	if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0)
		vm_fault_prefault(fs.map->pmap, vaddr, fs.entry);
	VM_OBJECT_LOCK(fs.object);
	vm_page_lock(fs.m);

	/*
	 * If the page is not wired down, then put it where the pageout daemon
	 * can find it.
	 */
	if (fault_flags & VM_FAULT_CHANGE_WIRING) {
		if (wired)
			vm_page_wire(fs.m);
		else
			vm_page_unwire(fs.m, 1);
	} else
		vm_page_activate(fs.m);
	if (m_hold != NULL) {
		*m_hold = fs.m;
		vm_page_hold(fs.m);
	}
	vm_page_unlock(fs.m);
	vm_page_wakeup(fs.m);

	/*
	 * Unlock everything, and return
	 */
	unlock_and_deallocate(&fs);
	if (hardfault)
		curthread->td_ru.ru_majflt++;
	else
		curthread->td_ru.ru_minflt++;

	return (KERN_SUCCESS);
}
Exemplo n.º 10
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_domap(struct proc *curp, struct lwp *lp, struct pfsnode *pfs,
	     struct uio *uio)
{
	struct proc *p = lp->lwp_proc;
	int len;
	struct vnode *vp;
	char *fullpath, *freepath;
	int error;
	vm_map_t map = &p->p_vmspace->vm_map;
	pmap_t pmap = vmspace_pmap(p->p_vmspace);
	vm_map_entry_t entry;
	char mebuffer[MEBUFFERSIZE];

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

	if (uio->uio_offset != 0)
		return (0);
	
	error = 0;
	vm_map_lock_read(map);
	for (entry = map->header.next;
		((uio->uio_resid > 0) && (entry != &map->header));
		entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t addr;
		vm_offset_t ostart;
		int resident, privateresident;
		char *type;

		if (entry->maptype != VM_MAPTYPE_NORMAL &&
		    entry->maptype != VM_MAPTYPE_VPAGETABLE) {
			continue;
		}

		obj = entry->object.vm_object;
		if (obj)
			vm_object_hold(obj);

		if (obj && (obj->shadow_count == 1))
			privateresident = obj->resident_page_count;
		else
			privateresident = 0;

		/*
		 * Use map->hint as a poor man's ripout detector.
		 */
		map->hint = entry;
		ostart = entry->start;

		/*
		 * Count resident pages (XXX can be horrible on 64-bit)
		 */
		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}
		if (obj) {
			lobj = obj;
			while ((tobj = lobj->backing_object) != NULL) {
				KKASSERT(tobj != obj);
				vm_object_hold(tobj);
				if (tobj == lobj->backing_object) {
					if (lobj != obj) {
						vm_object_lock_swap();
						vm_object_drop(lobj);
					}
					lobj = tobj;
				} else {
					vm_object_drop(tobj);
				}
			}
		} else {
			lobj = NULL;
		}

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			switch(lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				vp = NULL;
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				type = "swap";
				vp = NULL;
				break;
			case OBJT_DEVICE:
				type = "device";
				vp = NULL;
				break;
			}
			
			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			if (vp != NULL) {
				vn_fullpath(p, vp, &fullpath, &freepath, 1);
				vrele(vp);
			}
			if (lobj != obj)
				vm_object_drop(lobj);
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, res, priv res, cow, access, type, (fullpath).
		 */
		ksnprintf(mebuffer, sizeof(mebuffer),
#if LONG_BIT == 64
			  "0x%016lx 0x%016lx %d %d %p %s%s%s %d %d "
#else
			  "0x%08lx 0x%08lx %d %d %p %s%s%s %d %d "
#endif
			  "0x%04x %s %s %s %s\n",
			(u_long)entry->start, (u_long)entry->end,
			resident, privateresident, obj,
			(entry->protection & VM_PROT_READ)?"r":"-",
			(entry->protection & VM_PROT_WRITE)?"w":"-",
			(entry->protection & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath);

		if (obj)
			vm_object_drop(obj);

		if (freepath != NULL) {
			kfree(freepath, M_TEMP);
			freepath = NULL;
		}

		len = strlen(mebuffer);
		if (len > uio->uio_resid) {
			error = EFBIG;
			break;
		}

		/*
		 * We cannot safely hold the map locked while accessing
		 * userspace as a VM fault might recurse the locked map.
		 */
		vm_map_unlock_read(map);
		error = uiomove(mebuffer, len, uio);
		vm_map_lock_read(map);
		if (error)
			break;

		/*
		 * We use map->hint as a poor man's ripout detector.  If
		 * it does not match the entry we set it to prior to
		 * unlocking the map the entry MIGHT now be stale.  In
		 * this case we do an expensive lookup to find our place
		 * in the iteration again.
		 */
		if (map->hint != entry) {
			vm_map_entry_t reentry;

			vm_map_lookup_entry(map, ostart, &reentry);
			entry = reentry;
		}
	}
	vm_map_unlock_read(map);

	return error;
}
Exemplo n.º 11
0
int
sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
    register_t *retval)
{
	/* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(int) flags;
	} */
	struct proc *p = l->l_proc;
	vaddr_t addr;
	vsize_t size, pageoff;
	struct vm_map *map;
	int error, flags, uvmflags;
	bool rv;

	/*
	 * extract syscall args from the uap
	 */

	addr = (vaddr_t)SCARG(uap, addr);
	size = (vsize_t)SCARG(uap, len);
	flags = SCARG(uap, flags);

	/* sanity check flags */
	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
	    (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
	    (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
		return EINVAL;
	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
		flags |= MS_SYNC;

	/*
	 * align the address to a page boundary and adjust the size accordingly.
	 */

	pageoff = (addr & PAGE_MASK);
	addr -= pageoff;
	size += pageoff;
	size = (vsize_t)round_page(size);


	/*
	 * get map
	 */
	map = &p->p_vmspace->vm_map;

	error = range_test(map, addr, size, false);
	if (error)
		return ENOMEM;

	/*
	 * XXXCDC: do we really need this semantic?
	 *
	 * XXX Gak!  If size is zero we are supposed to sync "all modified
	 * pages with the region containing addr".  Unfortunately, we
	 * don't really keep track of individual mmaps so we approximate
	 * by flushing the range of the map entry containing addr.
	 * This can be incorrect if the region splits or is coalesced
	 * with a neighbor.
	 */

	if (size == 0) {
		struct vm_map_entry *entry;

		vm_map_lock_read(map);
		rv = uvm_map_lookup_entry(map, addr, &entry);
		if (rv == true) {
			addr = entry->start;
			size = entry->end - entry->start;
		}
		vm_map_unlock_read(map);
		if (rv == false)
			return EINVAL;
	}

	/*
	 * translate MS_ flags into PGO_ flags
	 */

	uvmflags = PGO_CLEANIT;
	if (flags & MS_INVALIDATE)
		uvmflags |= PGO_FREE;
	if (flags & MS_SYNC)
		uvmflags |= PGO_SYNCIO;

	error = uvm_map_clean(map, addr, addr+size, uvmflags);
	return error;
}
Exemplo n.º 12
0
/* ARGSUSED */
int
sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
    register_t *retval)
{
	/* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(char *) vec;
	} */
	struct proc *p = l->l_proc;
	struct vm_page *pg;
	char *vec, pgi;
	struct uvm_object *uobj;
	struct vm_amap *amap;
	struct vm_anon *anon;
	struct vm_map_entry *entry;
	vaddr_t start, end, lim;
	struct vm_map *map;
	vsize_t len;
	int error = 0, npgs;

	map = &p->p_vmspace->vm_map;

	start = (vaddr_t)SCARG(uap, addr);
	len = SCARG(uap, len);
	vec = SCARG(uap, vec);

	if (start & PAGE_MASK)
		return EINVAL;
	len = round_page(len);
	end = start + len;
	if (end <= start)
		return EINVAL;

	/*
	 * Lock down vec, so our returned status isn't outdated by
	 * storing the status byte for a page.
	 */

	npgs = len >> PAGE_SHIFT;
	error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
	if (error) {
		return error;
	}
	vm_map_lock_read(map);

	if (uvm_map_lookup_entry(map, start, &entry) == false) {
		error = ENOMEM;
		goto out;
	}

	for (/* nothing */;
	     entry != &map->header && entry->start < end;
	     entry = entry->next) {
		KASSERT(!UVM_ET_ISSUBMAP(entry));
		KASSERT(start >= entry->start);

		/* Make sure there are no holes. */
		if (entry->end < end &&
		     (entry->next == &map->header ||
		      entry->next->start > entry->end)) {
			error = ENOMEM;
			goto out;
		}

		lim = end < entry->end ? end : entry->end;

		/*
		 * Special case for objects with no "real" pages.  Those
		 * are always considered resident (mapped devices).
		 */

		if (UVM_ET_ISOBJ(entry)) {
			KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
			if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
				for (/* nothing */; start < lim;
				     start += PAGE_SIZE, vec++)
					subyte(vec, 1);
				continue;
			}
		}

		amap = entry->aref.ar_amap;	/* upper layer */
		uobj = entry->object.uvm_obj;	/* lower layer */

		if (amap != NULL)
			amap_lock(amap);
		if (uobj != NULL)
			mutex_enter(uobj->vmobjlock);

		for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
			pgi = 0;
			if (amap != NULL) {
				/* Check the upper layer first. */
				anon = amap_lookup(&entry->aref,
				    start - entry->start);
				/* Don't need to lock anon here. */
				if (anon != NULL && anon->an_page != NULL) {

					/*
					 * Anon has the page for this entry
					 * offset.
					 */

					pgi = 1;
				}
			}
			if (uobj != NULL && pgi == 0) {
				/* Check the lower layer. */
				pg = uvm_pagelookup(uobj,
				    entry->offset + (start - entry->start));
				if (pg != NULL) {

					/*
					 * Object has the page for this entry
					 * offset.
					 */

					pgi = 1;
				}
			}
			(void) subyte(vec, pgi);
		}
		if (uobj != NULL)
			mutex_exit(uobj->vmobjlock);
		if (amap != NULL)
			amap_unlock(amap);
	}

 out:
	vm_map_unlock_read(map);
	uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
	return error;
}
Exemplo n.º 13
0
/*
* Original vm_pageout_oom, will be called if LRU pageout_oom will fail
*/
static void
original_vm_pageout_oom(int shortage)
{
	struct proc *p, *bigproc;
	vm_offset_t size, bigsize;
	struct thread *td;
	struct vmspace *vm;

	/*
	 * We keep the process bigproc locked once we find it to keep anyone
	 * from messing with it; however, there is a possibility of
	 * deadlock if process B is bigproc and one of it's child processes
	 * attempts to propagate a signal to B while we are waiting for A's
	 * lock while walking this list.  To avoid this, we don't block on
	 * the process lock but just skip a process if it is already locked.
	 */
	bigproc = NULL;
	bigsize = 0;
	sx_slock(&allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
		int breakout;

		if (PROC_TRYLOCK(p) == 0)
			continue;
		/*
		 * If this is a system, protected or killed process, skip it.
		 */
		if (p->p_state != PRS_NORMAL ||
		    (p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM)) ||
		    (p->p_pid == 1) || P_KILLED(p) ||
		    ((p->p_pid < 48) && (swap_pager_avail != 0))) {
			PROC_UNLOCK(p);
			continue;
		}
		/*
		 * If the process is in a non-running type state,
		 * don't touch it.  Check all the threads individually.
		 */
		breakout = 0;
		FOREACH_THREAD_IN_PROC(p, td) {
			thread_lock(td);
			if (!TD_ON_RUNQ(td) &&
			    !TD_IS_RUNNING(td) &&
			    !TD_IS_SLEEPING(td)) {
				thread_unlock(td);
				breakout = 1;
				break;
			}
			thread_unlock(td);
		}
		if (breakout) {
			PROC_UNLOCK(p);
			continue;
		}
		/*
		 * get the process size
		 */
		vm = vmspace_acquire_ref(p);
		if (vm == NULL) {
			PROC_UNLOCK(p);
			continue;
		}
		if (!vm_map_trylock_read(&vm->vm_map)) {
			vmspace_free(vm);
			PROC_UNLOCK(p);
			continue;
		}
		size = vmspace_swap_count(vm);
		vm_map_unlock_read(&vm->vm_map);
		if (shortage == VM_OOM_MEM)
			size += vmspace_resident_count(vm);
		vmspace_free(vm);
		/*
		 * if the this process is bigger than the biggest one
		 * remember it.
		 */
		if (size > bigsize) {
			if (bigproc != NULL)
				PROC_UNLOCK(bigproc);
			bigproc = p;
			bigsize = size;
		} else
			PROC_UNLOCK(p);
	}
Exemplo n.º 14
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_domap(struct lwp *curl, struct proc *p, struct pfsnode *pfs,
	     struct uio *uio, int linuxmode)
{
	int error;
	struct vmspace *vm;
	struct vm_map *map;
	struct vm_map_entry *entry;
	char *buffer = NULL;
	size_t bufsize = BUFFERSIZE;
	char *path;
	struct vnode *vp;
	struct vattr va;
	dev_t dev;
	long fileid;
	size_t pos;
	int width = (int)((curl->l_proc->p_flag & PK_32) ? sizeof(int32_t) : 
	    sizeof(void *)) * 2;

	if (uio->uio_rw != UIO_READ)
		return EOPNOTSUPP;

	if (uio->uio_offset != 0) {
		/*
		 * we return 0 here, so that the second read returns EOF
		 * we don't support reading from an offset because the
		 * map could have changed between the two reads.
		 */
		return 0;
	}

	error = 0;

	if (linuxmode != 0)
		path = malloc(MAXPATHLEN * 4, M_TEMP, M_WAITOK);
	else
		path = NULL;

	if ((error = proc_vmspace_getref(p, &vm)) != 0)
		goto out;

	map = &vm->vm_map;
	vm_map_lock_read(map);

again:
	buffer = malloc(bufsize, M_TEMP, M_WAITOK);
	pos = 0;
	for (entry = map->header.next; entry != &map->header;
	    entry = entry->next) {

		if (UVM_ET_ISSUBMAP(entry))
			continue;

		if (linuxmode != 0) {
			*path = 0;
			dev = (dev_t)0;
			fileid = 0;
			if (UVM_ET_ISOBJ(entry) &&
			    UVM_OBJ_IS_VNODE(entry->object.uvm_obj)) {
				vp = (struct vnode *)entry->object.uvm_obj;
				vn_lock(vp, LK_SHARED | LK_RETRY);
				error = VOP_GETATTR(vp, &va, curl->l_cred);
				VOP_UNLOCK(vp);
				if (error == 0 && vp != pfs->pfs_vnode) {
					fileid = va.va_fileid;
					dev = va.va_fsid;
					error = vnode_to_path(path,
					    MAXPATHLEN * 4, vp, curl, p);
				}
			}
			pos += snprintf(buffer + pos, bufsize - pos,
			    "%.*"PRIxVADDR"-%.*"PRIxVADDR" %c%c%c%c "
			    "%.*lx %.2llx:%.2llx %-8ld %25.s %s\n",
			    width, entry->start,
			    width, entry->end,
			    (entry->protection & VM_PROT_READ) ? 'r' : '-',
			    (entry->protection & VM_PROT_WRITE) ? 'w' : '-',
			    (entry->protection & VM_PROT_EXECUTE) ? 'x' : '-',
			    (entry->etype & UVM_ET_COPYONWRITE) ? 'p' : 's',
			    width, (unsigned long)entry->offset,
			    (unsigned long long)major(dev),
			    (unsigned long long)minor(dev), fileid, "", path);
		} else {
			pos += snprintf(buffer + pos, bufsize - pos,
			    "%#"PRIxVADDR" %#"PRIxVADDR" "
			    "%c%c%c %c%c%c %s %s %d %d %d\n",
			    entry->start, entry->end,
			    (entry->protection & VM_PROT_READ) ? 'r' : '-',
			    (entry->protection & VM_PROT_WRITE) ? 'w' : '-',
			    (entry->protection & VM_PROT_EXECUTE) ? 'x' : '-',
			    (entry->max_protection & VM_PROT_READ) ? 'r' : '-',
			    (entry->max_protection & VM_PROT_WRITE) ? 'w' : '-',
			    (entry->max_protection & VM_PROT_EXECUTE) ?
				'x' : '-',
			    (entry->etype & UVM_ET_COPYONWRITE) ?
				"COW" : "NCOW",
			    (entry->etype & UVM_ET_NEEDSCOPY) ? "NC" : "NNC",
			    entry->inheritance, entry->wired_count,
			    entry->advice);
		}
		if (pos >= bufsize) {
			bufsize <<= 1;
			if (bufsize > MAXBUFFERSIZE) {
				error = ENOMEM;
				vm_map_unlock_read(map);
				uvmspace_free(vm);
				goto out;
			}
			free(buffer, M_TEMP);
			goto again;
		}
	}

	vm_map_unlock_read(map);
	uvmspace_free(vm);

	error = uiomove(buffer, pos, uio);
out:
	if (path != NULL)
		free(path, M_TEMP);
	if (buffer != NULL)
		free(buffer, M_TEMP);

	return error;
}
Exemplo n.º 15
0
int
sys_msync(struct proc *p, void *v, register_t *retval)
{
	struct sys_msync_args /* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(int) flags;
	} */ *uap = v;
	vaddr_t addr;
	vsize_t size, pageoff;
	vm_map_t map;
	int rv, flags, uvmflags;

	/*
	 * extract syscall args from the uap
	 */

	addr = (vaddr_t)SCARG(uap, addr);
	size = (vsize_t)SCARG(uap, len);
	flags = SCARG(uap, flags);

	/* sanity check flags */
	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
		return (EINVAL);
	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
		flags |= MS_SYNC;

	/*
	 * align the address to a page boundary, and adjust the size accordingly
	 */
	ALIGN_ADDR(addr, size, pageoff);
	if (addr > SIZE_MAX - size)
		return (EINVAL);		/* disallow wrap-around. */

	/*
	 * get map
	 */

	map = &p->p_vmspace->vm_map;

	/*
	 * XXXCDC: do we really need this semantic?
	 *
	 * XXX Gak!  If size is zero we are supposed to sync "all modified
	 * pages with the region containing addr".  Unfortunately, we
	 * don't really keep track of individual mmaps so we approximate
	 * by flushing the range of the map entry containing addr.
	 * This can be incorrect if the region splits or is coalesced
	 * with a neighbor.
	 */
	if (size == 0) {
		vm_map_entry_t entry;
		
		vm_map_lock_read(map);
		rv = uvm_map_lookup_entry(map, addr, &entry);
		if (rv == TRUE) {
			addr = entry->start;
			size = entry->end - entry->start;
		}
		vm_map_unlock_read(map);
		if (rv == FALSE)
			return (EINVAL);
	}

	/*
	 * translate MS_ flags into PGO_ flags
	 */
	uvmflags = PGO_CLEANIT;
	if (flags & MS_INVALIDATE)
		uvmflags |= PGO_FREE;
	if (flags & MS_SYNC)
		uvmflags |= PGO_SYNCIO;
	else
		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */

	return (uvm_map_clean(map, addr, addr+size, uvmflags));
}
Exemplo n.º 16
0
void debugger_getprocs_callback(struct allocation_t* ref)
{
	if (!ref)
		return;

	struct message_t* message = __get(ref);
	if (!message)
		return;

	// Only handle requests
	if (message->header.request != 1)
		goto cleanup;

	int(*_sx_slock)(struct sx *sx, int opts, const char *file, int line) = kdlsym(_sx_slock);
	void(*_sx_sunlock)(struct sx *sx, const char *file, int line) = kdlsym(_sx_sunlock);
	void(*_mtx_lock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_lock_flags);
	void(*_mtx_unlock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_unlock_flags);
	struct sx* allproclock = (struct sx*)kdlsym(allproc_lock);
	struct proclist* allproc = (struct proclist*)*(uint64_t*)kdlsym(allproc);

	void(*vmspace_free)(struct vmspace *) = kdlsym(vmspace_free);
	struct vmspace* (*vmspace_acquire_ref)(struct proc *) = kdlsym(vmspace_acquire_ref);
	void(*_vm_map_lock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_lock_read);
	void(*_vm_map_unlock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_unlock_read);

	uint64_t procCount = 0;
	struct proc* p = NULL;
	struct debugger_getprocs_t getproc = { 0 };

	sx_slock(allproclock);
	FOREACH_PROC_IN_SYSTEM(p)
	{
		PROC_LOCK(p);
		// Zero out our process information
		kmemset(&getproc, 0, sizeof(getproc));

		// Get the vm map
		struct vmspace* vm = vmspace_acquire_ref(p);
		vm_map_t map = &p->p_vmspace->vm_map;
		vm_map_lock_read(map);

		struct vm_map_entry* entry = map->header.next;

		// Copy over all of the address information
		getproc.process_id = p->p_pid;
		getproc.text_address = (uint64_t)entry->start;
		getproc.text_size = (uint64_t)entry->end - entry->start;
		getproc.data_address = (uint64_t)p->p_vmspace->vm_daddr;
		getproc.data_size = p->p_vmspace->vm_dsize;
		// Copy over the name and path
		kmemcpy(getproc.process_name, p->p_comm, sizeof(getproc.process_name));
		kmemcpy(getproc.path, p->p_elfpath, sizeof(getproc.path));
		// Write it back to the PC
		kwrite(message->socket, &getproc, sizeof(getproc));
		procCount++;

		// Free the vmmap
		vm_map_unlock_read(map);
		vmspace_free(vm);

		PROC_UNLOCK(p);
	}
	sx_sunlock(allproclock);
	// Send finalizer, because f**k this shit
	kmemset(&getproc, 0xDD, sizeof(getproc));
	kwrite(message->socket, &getproc, sizeof(getproc));

cleanup:
	__dec(ref);
}
Exemplo n.º 17
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_doprocmap(PFS_FILL_ARGS)
{
	struct vmspace *vm;
	vm_map_t map;
	vm_map_entry_t entry, tmp_entry;
	struct vnode *vp;
	char *fullpath, *freepath;
	struct uidinfo *uip;
	int error, vfslocked;
	unsigned int last_timestamp;
#ifdef COMPAT_FREEBSD32
	int wrap32 = 0;
#endif

	PROC_LOCK(p);
	error = p_candebug(td, p);
	PROC_UNLOCK(p);
	if (error)
		return (error);

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

#ifdef COMPAT_FREEBSD32
        if (curproc->p_sysent->sv_flags & SV_ILP32) {
                if (!(p->p_sysent->sv_flags & SV_ILP32))
                        return (EOPNOTSUPP);
                wrap32 = 1;
        }
#endif

	vm = vmspace_acquire_ref(p);
	if (vm == NULL)
		return (ESRCH);
	map = &vm->vm_map;
	vm_map_lock_read(map);
	for (entry = map->header.next; entry != &map->header;
	     entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t e_start, e_end, addr;
		int resident, privateresident;
		char *type;
		vm_eflags_t e_eflags;
		vm_prot_t e_prot;

		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
			continue;

		e_eflags = entry->eflags;
		e_prot = entry->protection;
		e_start = entry->start;
		e_end = entry->end;
		privateresident = 0;
		obj = entry->object.vm_object;
		if (obj != NULL) {
			VM_OBJECT_LOCK(obj);
			if (obj->shadow_count == 1)
				privateresident = obj->resident_page_count;
		}
		uip = (entry->uip) ? entry->uip : (obj ? obj->uip : NULL);

		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(map->pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}

		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
			if (tobj != obj)
				VM_OBJECT_LOCK(tobj);
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);
			lobj = tobj;
		}
		last_timestamp = map->timestamp;
		vm_map_unlock_read(map);

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			switch (lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				vp = NULL;
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				type = "swap";
				vp = NULL;
				break;
			case OBJT_SG:
			case OBJT_DEVICE:
				type = "device";
				vp = NULL;
				break;
			}
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);

			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			VM_OBJECT_UNLOCK(obj);
			if (vp != NULL) {
				vn_fullpath(td, vp, &fullpath, &freepath);
				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
				vrele(vp);
				VFS_UNLOCK_GIANT(vfslocked);
			}
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, resident, private resident, cow, access, type,
		 *         charged, charged uid.
		 */
		error = sbuf_printf(sb,
		    "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n",
			(u_long)e_start, (u_long)e_end,
			resident, privateresident,
#ifdef COMPAT_FREEBSD32
			wrap32 ? NULL : obj,	/* Hide 64 bit value */
#else
			obj,
#endif
			(e_prot & VM_PROT_READ)?"r":"-",
			(e_prot & VM_PROT_WRITE)?"w":"-",
			(e_prot & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(e_eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath,
			uip ? "CH":"NCH", uip ? uip->ui_uid : -1);

		if (freepath != NULL)
			free(freepath, M_TEMP);
		vm_map_lock_read(map);
		if (error == -1) {
			error = 0;
			break;
		}
		if (last_timestamp != map->timestamp) {
			/*
			 * Look again for the entry because the map was
			 * modified while it was unlocked.  Specifically,
			 * the entry may have been clipped, merged, or deleted.
			 */
			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
			entry = tmp_entry;
		}
	}
	vm_map_unlock_read(map);
	vmspace_free(vm);
	return (error);
}
Exemplo n.º 18
0
kern_return_t
vm32_region_info_64(
	__DEBUG_ONLY vm_map_t			map,
	__DEBUG_ONLY vm32_offset_t		address,
	__DEBUG_ONLY vm_info_region_64_t	*regionp,
	__DEBUG_ONLY vm_info_object_array_t	*objectsp,
	__DEBUG_ONLY mach_msg_type_number_t	*objectsCntp)
{
#if !MACH_VM_DEBUG
        return KERN_FAILURE;
#else
	vm_map_copy_t copy;
	vm_offset_t addr = 0;	/* memory for OOL data */
	vm_size_t size;		/* size of the memory */
	unsigned int room;	/* room for this many objects */
	unsigned int used;	/* actually this many objects */
	vm_info_region_64_t region;
	kern_return_t kr;

	if (map == VM_MAP_NULL)
		return KERN_INVALID_TASK;

	size = 0;		/* no memory allocated yet */

	for (;;) {
		vm_map_t cmap;	/* current map in traversal */
		vm_map_t nmap;	/* next map to look at */
		vm_map_entry_t entry;
		vm_object_t object, cobject, nobject;

		/* nothing is locked */

		vm_map_lock_read(map);
		for (cmap = map;; cmap = nmap) {
			/* cmap is read-locked */

			if (!vm_map_lookup_entry(cmap, address, &entry)) {
				entry = entry->vme_next;
				if (entry == vm_map_to_entry(cmap)) {
					vm_map_unlock_read(cmap);
					if (size != 0)
						kmem_free(ipc_kernel_map,
							  addr, size);
					return KERN_NO_SPACE;
				}
			}

			if (entry->is_sub_map)
				nmap = VME_SUBMAP(entry);
			else
				break;

			/* move down to the lower map */

			vm_map_lock_read(nmap);
			vm_map_unlock_read(cmap);
		}

		/* cmap is read-locked; we have a real entry */

		object = VME_OBJECT(entry);
		region.vir_start = (natural_t) entry->vme_start;
		region.vir_end = (natural_t) entry->vme_end;
		region.vir_object = (natural_t)(uintptr_t) object;
		region.vir_offset = VME_OFFSET(entry);
		region.vir_needs_copy = entry->needs_copy;
		region.vir_protection = entry->protection;
		region.vir_max_protection = entry->max_protection;
		region.vir_inheritance = entry->inheritance;
		region.vir_wired_count = entry->wired_count;
		region.vir_user_wired_count = entry->user_wired_count;

		used = 0;
		room = (unsigned int) (size / sizeof(vm_info_object_t));

		if (object == VM_OBJECT_NULL) {
			vm_map_unlock_read(cmap);
			/* no memory needed */
			break;
		}

		vm_object_lock(object);
		vm_map_unlock_read(cmap);

		for (cobject = object;; cobject = nobject) {
			/* cobject is locked */

			if (used < room) {
				vm_info_object_t *vio =
					&((vm_info_object_t *) addr)[used];

				vio->vio_object =
					(natural_t)(uintptr_t) cobject;
				vio->vio_size =
					(natural_t) cobject->vo_size;
				vio->vio_ref_count =
					cobject->ref_count;
				vio->vio_resident_page_count =
					cobject->resident_page_count;
				vio->vio_copy =
					(natural_t)(uintptr_t) cobject->copy;
				vio->vio_shadow =
					(natural_t)(uintptr_t) cobject->shadow;
				vio->vio_shadow_offset =
					(natural_t) cobject->vo_shadow_offset;
				vio->vio_paging_offset =
					(natural_t) cobject->paging_offset;
				vio->vio_copy_strategy =
					cobject->copy_strategy;
				vio->vio_last_alloc =
					(vm_offset_t) cobject->last_alloc;
				vio->vio_paging_in_progress =
					cobject->paging_in_progress +
					cobject->activity_in_progress;
				vio->vio_pager_created =
					cobject->pager_created;
				vio->vio_pager_initialized =
					cobject->pager_initialized;
				vio->vio_pager_ready =
					cobject->pager_ready;
				vio->vio_can_persist =
					cobject->can_persist;
				vio->vio_internal =
					cobject->internal;
				vio->vio_temporary =
					cobject->temporary;
				vio->vio_alive =
					cobject->alive;
				vio->vio_purgable =
					(cobject->purgable != VM_PURGABLE_DENY);
				vio->vio_purgable_volatile =
					(cobject->purgable == VM_PURGABLE_VOLATILE ||
					 cobject->purgable == VM_PURGABLE_EMPTY);
			}

			used++;
			nobject = cobject->shadow;
			if (nobject == VM_OBJECT_NULL) {
				vm_object_unlock(cobject);
				break;
			}

			vm_object_lock(nobject);
			vm_object_unlock(cobject);
		}

		/* nothing locked */

		if (used <= room)
			break;

		/* must allocate more memory */

		if (size != 0)
			kmem_free(ipc_kernel_map, addr, size);
		size = vm_map_round_page(2 * used * sizeof(vm_info_object_t),
					 VM_MAP_PAGE_MASK(ipc_kernel_map));

		kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
		if (kr != KERN_SUCCESS)
			return KERN_RESOURCE_SHORTAGE;

		kr = vm_map_wire(
			ipc_kernel_map,
			vm_map_trunc_page(addr,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			vm_map_round_page(addr + size,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			VM_PROT_READ|VM_PROT_WRITE,
			FALSE);
		assert(kr == KERN_SUCCESS);
	}

	/* free excess memory; make remaining memory pageable */

	if (used == 0) {
		copy = VM_MAP_COPY_NULL;

		if (size != 0)
			kmem_free(ipc_kernel_map, addr, size);
	} else {
		vm_size_t size_used = (used * sizeof(vm_info_object_t));
		vm_size_t vmsize_used = vm_map_round_page(size_used,
					  VM_MAP_PAGE_MASK(ipc_kernel_map));

		kr = vm_map_unwire(
			ipc_kernel_map,
			vm_map_trunc_page(addr,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			vm_map_round_page(addr + size_used,
					  VM_MAP_PAGE_MASK(ipc_kernel_map)),
			FALSE);
		assert(kr == KERN_SUCCESS);

		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
				   (vm_map_size_t)size_used, TRUE, &copy);
		assert(kr == KERN_SUCCESS);

		if (size != vmsize_used)
			kmem_free(ipc_kernel_map,
				  addr + vmsize_used, size - vmsize_used);
	}

	*regionp = region;
	*objectsp = (vm_info_object_array_t) copy;
	*objectsCntp = used;
	return KERN_SUCCESS;
#endif /* MACH_VM_DEBUG */
}
Exemplo n.º 19
0
/*
 * The caller must hold proc_token.
 */
static int
do_vmtotal_callback(struct proc *p, void *data)
{
	struct vmtotal *totalp = data;
	struct lwp *lp;
	vm_map_entry_t entry;
	vm_map_t map;
	int paging;

	if (p->p_flag & P_SYSTEM)
		return(0);

	FOREACH_LWP_IN_PROC(lp, p) {
		switch (lp->lwp_stat) {
		case LSSTOP:
		case LSSLEEP:
			if ((p->p_flag & P_SWAPPEDOUT) == 0) {
				if ((lp->lwp_flag & LWP_SINTR) == 0)
					totalp->t_dw++;
				else if (lp->lwp_slptime < maxslp)
					totalp->t_sl++;
			} else if (lp->lwp_slptime < maxslp) {
				totalp->t_sw++;
			}
			if (lp->lwp_slptime >= maxslp)
				return(0);
			break;

		case LSRUN:
			if (p->p_flag & P_SWAPPEDOUT)
				totalp->t_sw++;
			else
				totalp->t_rq++;
			if (p->p_stat == SIDL)
				return(0);
			break;

		default:
			return (0);
		}
	}

	/*
	 * Note active objects.
	 */
	paging = 0;
	lwkt_gettoken(&vm_token);
	if (p->p_vmspace) {
		map = &p->p_vmspace->vm_map;
		vm_map_lock_read(map);
		for (entry = map->header.next;
		     entry != &map->header; entry = entry->next) {
			if (entry->maptype != VM_MAPTYPE_NORMAL &&
			    entry->maptype != VM_MAPTYPE_VPAGETABLE) {
				continue;
			}
			if (entry->object.vm_object == NULL)
				continue;
			vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE);
			paging |= entry->object.vm_object->paging_in_progress;
		}
		vm_map_unlock_read(map);
	}
	lwkt_reltoken(&vm_token);
	if (paging)
		totalp->t_pw++;
	return(0);
}