Example #1
0
static int
phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
{
	int i, s;

	s = splvm();
	/*
	 * Fill as many pages as vm_fault has allocated for us.
	 */
	for (i = 0; i < count; i++) {
		if ((m[i]->flags & PG_ZERO) == 0)
			pmap_zero_page(m[i]);
		vm_page_flag_set(m[i], PG_ZERO);
		/* Switch off pv_entries */
		vm_page_lock_queues();
		vm_page_unmanage(m[i]);
		vm_page_unlock_queues();
		m[i]->valid = VM_PAGE_BITS_ALL;
		m[i]->dirty = 0;
		/* The requested page must remain busy, the others not. */
		if (reqpage != i) {
			vm_page_flag_clear(m[i], PG_BUSY);
			m[i]->busy = 0;
		}
	}
	splx(s);

	return (VM_PAGER_OK);
}
Example #2
0
/*
 * mincore system call handler
 *
 * mincore_args(const void *addr, size_t len, char *vec)
 *
 * No requirements
 */
int
sys_mincore(struct mincore_args *uap)
{
	struct proc *p = curproc;
	vm_offset_t addr, first_addr;
	vm_offset_t end, cend;
	pmap_t pmap;
	vm_map_t map;
	char *vec;
	int error;
	int vecindex, lastvecindex;
	vm_map_entry_t current;
	vm_map_entry_t entry;
	int mincoreinfo;
	unsigned int timestamp;

	/*
	 * Make sure that the addresses presented are valid for user
	 * mode.
	 */
	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
	end = addr + (vm_size_t)round_page(uap->len);
	if (end < addr)
		return (EINVAL);
	if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS)
		return (EINVAL);

	/*
	 * Address of byte vector
	 */
	vec = uap->vec;

	map = &p->p_vmspace->vm_map;
	pmap = vmspace_pmap(p->p_vmspace);

	lwkt_gettoken(&map->token);
	vm_map_lock_read(map);
RestartScan:
	timestamp = map->timestamp;

	if (!vm_map_lookup_entry(map, addr, &entry))
		entry = entry->next;

	/*
	 * Do this on a map entry basis so that if the pages are not
	 * in the current processes address space, we can easily look
	 * up the pages elsewhere.
	 */
	lastvecindex = -1;
	for(current = entry;
		(current != &map->header) && (current->start < end);
		current = current->next) {

		/*
		 * ignore submaps (for now) or null objects
		 */
		if (current->maptype != VM_MAPTYPE_NORMAL &&
		    current->maptype != VM_MAPTYPE_VPAGETABLE) {
			continue;
		}
		if (current->object.vm_object == NULL)
			continue;
		
		/*
		 * limit this scan to the current map entry and the
		 * limits for the mincore call
		 */
		if (addr < current->start)
			addr = current->start;
		cend = current->end;
		if (cend > end)
			cend = end;

		/*
		 * scan this entry one page at a time
		 */
		while (addr < cend) {
			/*
			 * Check pmap first, it is likely faster, also
			 * it can provide info as to whether we are the
			 * one referencing or modifying the page.
			 *
			 * If we have to check the VM object, only mess
			 * around with normal maps.  Do not mess around
			 * with virtual page tables (XXX).
			 */
			mincoreinfo = pmap_mincore(pmap, addr);
			if (mincoreinfo == 0 &&
			    current->maptype == VM_MAPTYPE_NORMAL) {
				vm_pindex_t pindex;
				vm_ooffset_t offset;
				vm_page_t m;

				/*
				 * calculate the page index into the object
				 */
				offset = current->offset + (addr - current->start);
				pindex = OFF_TO_IDX(offset);

				/*
				 * if the page is resident, then gather 
				 * information about it.  spl protection is
				 * required to maintain the object 
				 * association.  And XXX what if the page is
				 * busy?  What's the deal with that?
				 *
				 * XXX vm_token - legacy for pmap_ts_referenced
				 *     in i386 and vkernel pmap code.
				 */
				lwkt_gettoken(&vm_token);
				vm_object_hold(current->object.vm_object);
				m = vm_page_lookup(current->object.vm_object,
						    pindex);
				if (m && m->valid) {
					mincoreinfo = MINCORE_INCORE;
					if (m->dirty ||
						pmap_is_modified(m))
						mincoreinfo |= MINCORE_MODIFIED_OTHER;
					if ((m->flags & PG_REFERENCED) ||
						pmap_ts_referenced(m)) {
						vm_page_flag_set(m, PG_REFERENCED);
						mincoreinfo |= MINCORE_REFERENCED_OTHER;
					}
				}
				vm_object_drop(current->object.vm_object);
				lwkt_reltoken(&vm_token);
			}

			/*
			 * subyte may page fault.  In case it needs to modify
			 * the map, we release the lock.
			 */
			vm_map_unlock_read(map);

			/*
			 * calculate index into user supplied byte vector
			 */
			vecindex = OFF_TO_IDX(addr - first_addr);

			/*
			 * If we have skipped map entries, we need to make sure that
			 * the byte vector is zeroed for those skipped entries.
			 */
			while((lastvecindex + 1) < vecindex) {
				error = subyte( vec + lastvecindex, 0);
				if (error) {
					error = EFAULT;
					goto done;
				}
				++lastvecindex;
			}

			/*
			 * Pass the page information to the user
			 */
			error = subyte( vec + vecindex, mincoreinfo);
			if (error) {
				error = EFAULT;
				goto done;
			}

			/*
			 * If the map has changed, due to the subyte, the previous
			 * output may be invalid.
			 */
			vm_map_lock_read(map);
			if (timestamp != map->timestamp)
				goto RestartScan;

			lastvecindex = vecindex;
			addr += PAGE_SIZE;
		}
	}

	/*
	 * subyte may page fault.  In case it needs to modify
	 * the map, we release the lock.
	 */
	vm_map_unlock_read(map);

	/*
	 * Zero the last entries in the byte vector.
	 */
	vecindex = OFF_TO_IDX(end - first_addr);
	while((lastvecindex + 1) < vecindex) {
		error = subyte( vec + lastvecindex, 0);
		if (error) {
			error = EFAULT;
			goto done;
		}
		++lastvecindex;
	}
	
	/*
	 * If the map has changed, due to the subyte, the previous
	 * output may be invalid.
	 */
	vm_map_lock_read(map);
	if (timestamp != map->timestamp)
		goto RestartScan;
	vm_map_unlock_read(map);

	error = 0;
done:
	lwkt_reltoken(&map->token);
	return (error);
}
Example #3
0
/*
 * A VFS can call this function to try to dispose of a read request
 * directly from the VM system, pretty much bypassing almost all VFS
 * overhead except for atime updates.
 *
 * If 0 is returned some or all of the uio was handled.  The caller must
 * check the uio and handle the remainder.
 *
 * The caller must fail on a non-zero error.
 */
int
vop_helper_read_shortcut(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct uio *uio;
	struct lwbuf *lwb;
	struct lwbuf lwb_cache;
	vm_object_t obj;
	vm_page_t m;
	int offset;
	int n;
	int error;

	vp = ap->a_vp;
	uio = ap->a_uio;

	/*
	 * We can't short-cut if there is no VM object or this is a special
	 * UIO_NOCOPY read (typically from VOP_STRATEGY()).  We also can't
	 * do this if we cannot extract the filesize from the vnode.
	 */
	if (vm_read_shortcut_enable == 0)
		return(0);
	if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY)
		return(0);
	if (vp->v_filesize == NOOFFSET)
		return(0);
	if (uio->uio_resid == 0)
		return(0);

	/*
	 * Iterate the uio on a page-by-page basis
	 *
	 * XXX can we leave the object held shared during the uiomove()?
	 */
	++vm_read_shortcut_count;
	obj = vp->v_object;
	vm_object_hold_shared(obj);

	error = 0;
	while (uio->uio_resid && error == 0) {
		offset = (int)uio->uio_offset & PAGE_MASK;
		n = PAGE_SIZE - offset;
		if (n > uio->uio_resid)
			n = uio->uio_resid;
		if (vp->v_filesize < uio->uio_offset)
			break;
		if (uio->uio_offset + n > vp->v_filesize)
			n = vp->v_filesize - uio->uio_offset;
		if (n == 0)
			break;	/* hit EOF */

		m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset),
					    FALSE, &error);
		if (error || m == NULL) {
			++vm_read_shortcut_failed;
			error = 0;
			break;
		}
		if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
			++vm_read_shortcut_failed;
			vm_page_wakeup(m);
			break;
		}
		lwb = lwbuf_alloc(m, &lwb_cache);

		/*
		 * Use a no-fault uiomove() to avoid deadlocking against
		 * our VM object (which could livelock on the same object
		 * due to shared-vs-exclusive), or deadlocking against
		 * our busied page.  Returns EFAULT on any fault which
		 * winds up diving a vnode.
		 */
		error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset,
					n, uio);

		vm_page_flag_set(m, PG_REFERENCED);
		lwbuf_free(lwb);
		vm_page_wakeup(m);
	}
	vm_object_drop(obj);

	/*
	 * Ignore EFAULT since we used uiomove_nofault(), causes caller
	 * to fall-back to normal code for this case.
	 */
	if (error == EFAULT)
		error = 0;

	return (error);
}
Example #4
0
/*
 * MPSAFE thread
 */
static void
vm_pagezero(void *arg)
{
	vm_page_t m = NULL;
	struct lwbuf *lwb = NULL;
	struct lwbuf lwb_cache;
	enum zeroidle_state state = STATE_IDLE;
	char *pg = NULL;
	int npages = 0;
	int sleep_time;	
	int i = 0;
	int cpu = (int)(intptr_t)arg;
	int zero_state = 0;

	/*
	 * Adjust thread parameters before entering our loop.  The thread
	 * is started with the MP lock held and with normal kernel thread
	 * priority.
	 *
	 * Also put us on the last cpu for now.
	 *
	 * For now leave the MP lock held, the VM routines cannot be called
	 * with it released until tokenization is finished.
	 */
	lwkt_setpri_self(TDPRI_IDLE_WORK);
	lwkt_setcpu_self(globaldata_find(cpu));
	sleep_time = DEFAULT_SLEEP_TIME;

	/*
	 * Loop forever
	 */
	for (;;) {
		int zero_count;

		switch(state) {
		case STATE_IDLE:
			/*
			 * Wait for work.
			 */
			tsleep(&zero_state, 0, "pgzero", sleep_time);
			if (vm_page_zero_check(&zero_count, &zero_state))
				npages = idlezero_rate / 10;
			sleep_time = vm_page_zero_time(zero_count);
			if (npages)
				state = STATE_GET_PAGE;	/* Fallthrough */
			break;
		case STATE_GET_PAGE:
			/*
			 * Acquire page to zero
			 */
			if (--npages == 0) {
				state = STATE_IDLE;
			} else {
				m = vm_page_free_fromq_fast();
				if (m == NULL) {
					state = STATE_IDLE;
				} else {
					state = STATE_ZERO_PAGE;
					lwb = lwbuf_alloc(m, &lwb_cache);
					pg = (char *)lwbuf_kva(lwb);
					i = 0;
				}
			}
			break;
		case STATE_ZERO_PAGE:
			/*
			 * Zero-out the page
			 */
			while (i < PAGE_SIZE) {
				if (idlezero_nocache == 1)
					bzeront(&pg[i], IDLEZERO_RUN);
				else
					bzero(&pg[i], IDLEZERO_RUN);
				i += IDLEZERO_RUN;
				lwkt_yield();
			}
			state = STATE_RELEASE_PAGE;
			break;
		case STATE_RELEASE_PAGE:
			lwbuf_free(lwb);
			vm_page_flag_set(m, PG_ZERO);
			vm_page_free_toq(m);
			state = STATE_GET_PAGE;
			++idlezero_count;	/* non-locked, SMP race ok */
			break;
		}
		lwkt_yield();
	}
}