Exemplo n.º 1
0
/*
 * Same as above, but forces the page to be detached from the object
 * and go into free pool.
 */
void
sf_ext_free_nocache(void *arg1, void *arg2)
{
	struct sf_buf *sf = arg1;
	struct sendfile_sync *sfs = arg2;
	vm_page_t pg = sf_buf_page(sf);

	sf_buf_free(sf);

	vm_page_lock(pg);
	if (vm_page_unwire(pg, PQ_NONE)) {
		vm_object_t obj;

		/* Try to free the page, but only if it is cheap to. */
		if ((obj = pg->object) == NULL)
			vm_page_free(pg);
		else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) {
			vm_page_free(pg);
			VM_OBJECT_WUNLOCK(obj);
		} else
			vm_page_deactivate(pg);
	}
	vm_page_unlock(pg);

	if (sfs != NULL) {
		mtx_lock(&sfs->mtx);
		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
		if (--sfs->count == 0)
			cv_signal(&sfs->cv);
		mtx_unlock(&sfs->mtx);
	}
}
Exemplo n.º 2
0
static inline void
release_page(struct faultstate *fs)
{

	vm_page_wakeup(fs->m);
	vm_page_lock(fs->m);
	vm_page_deactivate(fs->m);
	vm_page_unlock(fs->m);
	fs->m = NULL;
}
Exemplo n.º 3
0
/*
 * Speed up the reclamation of up to "distance" pages that precede the
 * faulting pindex within the first object of the shadow chain.
 */
static void
vm_fault_cache_behind(const struct faultstate *fs, int distance)
{
	vm_object_t first_object, object;
	vm_page_t m, m_prev;
	vm_pindex_t pindex;

	object = fs->object;
	VM_OBJECT_ASSERT_WLOCKED(object);
	first_object = fs->first_object;
	if (first_object != object) {
		if (!VM_OBJECT_TRYWLOCK(first_object)) {
			VM_OBJECT_WUNLOCK(object);
			VM_OBJECT_WLOCK(first_object);
			VM_OBJECT_WLOCK(object);
		}
	}
	/* Neither fictitious nor unmanaged pages can be cached. */
	if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) {
		if (fs->first_pindex < distance)
			pindex = 0;
		else
			pindex = fs->first_pindex - distance;
		if (pindex < OFF_TO_IDX(fs->entry->offset))
			pindex = OFF_TO_IDX(fs->entry->offset);
		m = first_object != object ? fs->first_m : fs->m;
		KASSERT((m->oflags & VPO_BUSY) != 0,
		    ("vm_fault_cache_behind: page %p is not busy", m));
		m_prev = vm_page_prev(m);
		while ((m = m_prev) != NULL && m->pindex >= pindex &&
		    m->valid == VM_PAGE_BITS_ALL) {
			m_prev = vm_page_prev(m);
			if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
				continue;
			vm_page_lock(m);
			if (m->hold_count == 0 && m->wire_count == 0) {
				pmap_remove_all(m);
				vm_page_aflag_clear(m, PGA_REFERENCED);
				if (m->dirty != 0)
					vm_page_deactivate(m);
				else
					vm_page_cache(m);
			}
			vm_page_unlock(m);
		}
	}
	if (first_object != object)
		VM_OBJECT_WUNLOCK(first_object);
}
Exemplo n.º 4
0
/*
 * Speed up the reclamation of up to "distance" pages that precede the
 * faulting pindex within the first object of the shadow chain.
 */
static void
vm_fault_cache_behind(const struct faultstate *fs, int distance)
{
	vm_object_t first_object, object;
	vm_page_t m, m_prev;
	vm_pindex_t pindex;

	object = fs->object;
	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
	first_object = fs->first_object;
	if (first_object != object) {
		if (!VM_OBJECT_TRYLOCK(first_object)) {
			VM_OBJECT_UNLOCK(object);
			VM_OBJECT_LOCK(first_object);
			VM_OBJECT_LOCK(object);
		}
	}
	if (first_object->type != OBJT_DEVICE &&
	    first_object->type != OBJT_PHYS && first_object->type != OBJT_SG) {
		if (fs->first_pindex < distance)
			pindex = 0;
		else
			pindex = fs->first_pindex - distance;
		if (pindex < OFF_TO_IDX(fs->entry->offset))
			pindex = OFF_TO_IDX(fs->entry->offset);
		m = first_object != object ? fs->first_m : fs->m;
		KASSERT((m->oflags & VPO_BUSY) != 0,
		    ("vm_fault_cache_behind: page %p is not busy", m));
		m_prev = vm_page_prev(m);
		while ((m = m_prev) != NULL && m->pindex >= pindex &&
		    m->valid == VM_PAGE_BITS_ALL) {
			m_prev = vm_page_prev(m);
			if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
				continue;
			vm_page_lock(m);
			if (m->hold_count == 0 && m->wire_count == 0) {
				pmap_remove_all(m);
				vm_page_aflag_clear(m, PGA_REFERENCED);
				if (m->dirty != 0)
					vm_page_deactivate(m);
				else
					vm_page_cache(m);
			}
			vm_page_unlock(m);
		}
	}
	if (first_object != object)
		VM_OBJECT_UNLOCK(first_object);
}
Exemplo n.º 5
0
int
memory_object_control_uiomove(
	memory_object_control_t	control,
	memory_object_offset_t	offset,
	void		*	uio,
	int			start_offset,
	int			io_requested,
	int			mark_dirty,
	int			take_reference)
{
	vm_object_t		object;
	vm_page_t		dst_page;
	int			xsize;
	int			retval = 0;
	int			cur_run;
	int			cur_needed;
	int			i;
	int			orig_offset;
	vm_page_t		page_run[MAX_RUN];

	object = memory_object_control_to_vm_object(control);
	if (object == VM_OBJECT_NULL) {
		return (0);
	}
	assert(!object->internal);

	vm_object_lock(object);

	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
		/*
		 * We can't modify the pages without honoring
		 * copy-on-write obligations first, so fall off
		 * this optimized path and fall back to the regular
		 * path.
		 */
		vm_object_unlock(object);
		return 0;
	}
	orig_offset = start_offset;
	    
	while (io_requested && retval == 0) {

		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;

		if (cur_needed > MAX_RUN)
		        cur_needed = MAX_RUN;

		for (cur_run = 0; cur_run < cur_needed; ) {

		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
			        break;

			/*
			 * if we're in this routine, we are inside a filesystem's
			 * locking model, so we don't ever want to wait for pages that have
			 * list_req_pending == TRUE since it means that the
			 * page is a candidate for some type of I/O operation,
			 * but that it has not yet been gathered into a UPL...
			 * this implies that it is still outside the domain
			 * of the filesystem and that whoever is responsible for
			 * grabbing it into a UPL may be stuck behind the filesystem
			 * lock this thread owns, or trying to take a lock exclusively
			 * and waiting for the readers to drain from a rw lock...
			 * if we block in those cases, we will deadlock
			 */
			if (dst_page->list_req_pending) {

				if (dst_page->absent) {
					/*
					 * this is the list_req_pending | absent | busy case
					 * which originates from vm_fault_page... we want
					 * to fall out of the fast path and go back
					 * to the caller which will gather this page
					 * into a UPL and issue the I/O if no one
					 * else beats us to it
					 */
					break;
				}
				if (dst_page->pageout || dst_page->cleaning) {
					/*
					 * this is the list_req_pending | pageout | busy case
					 * or the list_req_pending | cleaning case...
					 * which originate from the pageout_scan and
					 * msync worlds for the pageout case and the hibernate
					 * pre-cleaning world for the cleaning case...
					 * we need to reset the state of this page to indicate
					 * it should stay in the cache marked dirty... nothing else we
					 * can do at this point... we can't block on it, we can't busy
					 * it and we can't clean it from this routine.
					 */
					vm_page_lockspin_queues();

					vm_pageout_queue_steal(dst_page, TRUE); 
					vm_page_deactivate(dst_page);

					vm_page_unlock_queues();
				}
				/*
				 * this is the list_req_pending | cleaning case...
				 * we can go ahead and deal with this page since
				 * its ok for us to mark this page busy... if a UPL
				 * tries to gather this page, it will block until the
				 * busy is cleared, thus allowing us safe use of the page
				 * when we're done with it, we will clear busy and wake
				 * up anyone waiting on it, thus allowing the UPL creation
				 * to finish
				 */

			} else if (dst_page->busy || dst_page->cleaning) {
				/*
				 * someone else is playing with the page... if we've
				 * already collected pages into this run, go ahead
				 * and process now, we can't block on this
				 * page while holding other pages in the BUSY state
				 * otherwise we will wait
				 */
				if (cur_run)
					break;
				PAGE_SLEEP(object, dst_page, THREAD_UNINT);
				continue;
			}

			/*
			 * this routine is only called when copying
			 * to/from real files... no need to consider
			 * encrypted swap pages
			 */
			assert(!dst_page->encrypted);

		        if (mark_dirty) {
			        dst_page->dirty = TRUE;
				if (dst_page->cs_validated && 
				    !dst_page->cs_tainted) {
					/*
					 * CODE SIGNING:
					 * We're modifying a code-signed
					 * page: force revalidate
					 */
					dst_page->cs_validated = FALSE;
#if DEVELOPMENT || DEBUG
                                        vm_cs_validated_resets++;
#endif
					pmap_disconnect(dst_page->phys_page);
				}
			}
			dst_page->busy = TRUE;

			page_run[cur_run++] = dst_page;

			offset += PAGE_SIZE_64;
		}
		if (cur_run == 0)
		        /*
			 * we hit a 'hole' in the cache or
			 * a page we don't want to try to handle,
			 * so bail at this point
			 * we'll unlock the object below
			 */
		        break;
		vm_object_unlock(object);

		for (i = 0; i < cur_run; i++) {
		  
		        dst_page = page_run[i];

			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
			        xsize = io_requested;

			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
			        break;

			io_requested -= xsize;
			start_offset = 0;
		}
		vm_object_lock(object);

		/*
		 * if we have more than 1 page to work on
		 * in the current run, or the original request
		 * started at offset 0 of the page, or we're
		 * processing multiple batches, we will move
		 * the pages to the tail of the inactive queue
		 * to implement an LRU for read/write accesses
		 *
		 * the check for orig_offset == 0 is there to 
		 * mitigate the cost of small (< page_size) requests
		 * to the same page (this way we only move it once)
		 */
		if (take_reference && (cur_run > 1 || orig_offset == 0)) {

			vm_page_lockspin_queues();

			for (i = 0; i < cur_run; i++)
				vm_page_lru(page_run[i]);

			vm_page_unlock_queues();
		}
		for (i = 0; i < cur_run; i++) {
		        dst_page = page_run[i];

			/*
			 * someone is explicitly referencing this page...
			 * update clustered and speculative state
			 * 
			 */
			VM_PAGE_CONSUME_CLUSTERED(dst_page);

			PAGE_WAKEUP_DONE(dst_page);
		}
		orig_offset = 0;
	}
	vm_object_unlock(object);

	return (retval);
}
Exemplo n.º 6
0
static int
shm_dotruncate(struct shmfd *shmfd, off_t length)
{
	vm_object_t object;
	vm_page_t m, ma[1];
	vm_pindex_t idx, nobjsize;
	vm_ooffset_t delta;
	int base, rv;

	object = shmfd->shm_object;
	VM_OBJECT_LOCK(object);
	if (length == shmfd->shm_size) {
		VM_OBJECT_UNLOCK(object);
		return (0);
	}
	nobjsize = OFF_TO_IDX(length + PAGE_MASK);

	/* Are we shrinking?  If so, trim the end. */
	if (length < shmfd->shm_size) {
		/*
		 * Disallow any requests to shrink the size if this
		 * object is mapped into the kernel.
		 */
		if (shmfd->shm_kmappings > 0) {
			VM_OBJECT_UNLOCK(object);
			return (EBUSY);
		}

		/*
		 * Zero the truncated part of the last page.
		 */
		base = length & PAGE_MASK;
		if (base != 0) {
			idx = OFF_TO_IDX(length);
retry:
			m = vm_page_lookup(object, idx);
			if (m != NULL) {
				if ((m->oflags & VPO_BUSY) != 0 ||
				    m->busy != 0) {
					vm_page_sleep(m, "shmtrc");
					goto retry;
				}
			} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
				m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
				if (m == NULL) {
					VM_OBJECT_UNLOCK(object);
					VM_WAIT;
					VM_OBJECT_LOCK(object);
					goto retry;
				} else if (m->valid != VM_PAGE_BITS_ALL) {
					ma[0] = m;
					rv = vm_pager_get_pages(object, ma, 1,
					    0);
					m = vm_page_lookup(object, idx);
				} else
					/* A cached page was reactivated. */
					rv = VM_PAGER_OK;
				vm_page_lock(m);
				if (rv == VM_PAGER_OK) {
					vm_page_deactivate(m);
					vm_page_unlock(m);
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
					vm_page_unlock(m);
					VM_OBJECT_UNLOCK(object);
					return (EIO);
				}
			}
			if (m != NULL) {
				pmap_zero_page_area(m, base, PAGE_SIZE - base);
				KASSERT(m->valid == VM_PAGE_BITS_ALL,
				    ("shm_dotruncate: page %p is invalid", m));
				vm_page_dirty(m);
				vm_pager_page_unswapped(m);
			}
		}
		delta = ptoa(object->size - nobjsize);

		/* Toss in memory pages. */
		if (nobjsize < object->size)
			vm_object_page_remove(object, nobjsize, object->size,
			    0);

		/* Toss pages from swap. */
		if (object->type == OBJT_SWAP)
			swap_pager_freespace(object, nobjsize, delta);

		/* Free the swap accounted for shm */
		swap_release_by_cred(delta, object->cred);
		object->charge -= delta;
	} else {
		/* Attempt to reserve the swap */
		delta = ptoa(nobjsize - object->size);
		if (!swap_reserve_by_cred(delta, object->cred)) {
			VM_OBJECT_UNLOCK(object);
			return (ENOMEM);
		}
		object->charge += delta;
	}
	shmfd->shm_size = length;
	mtx_lock(&shm_timestamp_lock);
	vfs_timestamp(&shmfd->shm_ctime);
	shmfd->shm_mtime = shmfd->shm_ctime;
	mtx_unlock(&shm_timestamp_lock);
	object->size = nobjsize;
	VM_OBJECT_UNLOCK(object);
	return (0);
}
Exemplo n.º 7
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_reqpage, vm_ooffset_t a_offset)
 */
int
nwfs_getpages(struct vop_getpages_args *ap)
{
#ifndef NWFS_RWCACHE
	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
					    ap->a_reqpage, ap->a_seqaccess);
#else
	int i, error, npages;
	size_t nextoff, toff;
	size_t count;
	size_t size;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;

	vp = ap->a_vp;
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("nwfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("nwfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* NWFS_RWCACHE */
}
Exemplo n.º 8
0
/*
 *	vm_pageout_scan does the dirty work for the pageout daemon.
 */
void
vm_pageout_scan()
{
	register vm_page_t	m, next;
	register int		page_shortage;
	register int		s;
	register int		pages_freed;
	int			free;
	vm_object_t		object;

	/*
	 *	Only continue when we want more pages to be "free"
	 */

	cnt.v_rev++;

	s = splimp();
	simple_lock(&vm_page_queue_free_lock);
	free = cnt.v_free_count;
	simple_unlock(&vm_page_queue_free_lock);
	splx(s);

	if (free < cnt.v_free_target) {
		swapout_threads();

		/*
		 *	Be sure the pmap system is updated so
		 *	we can scan the inactive queue.
		 */

		pmap_update();
	}

	/*
	 *	Acquire the resident page system lock,
	 *	as we may be changing what's resident quite a bit.
	 */
	vm_page_lock_queues();

	/*
	 *	Start scanning the inactive queue for pages we can free.
	 *	We keep scanning until we have enough free pages or
	 *	we have scanned through the entire queue.  If we
	 *	encounter dirty pages, we start cleaning them.
	 */

	pages_freed = 0;
	for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
		s = splimp();
		simple_lock(&vm_page_queue_free_lock);
		free = cnt.v_free_count;
		simple_unlock(&vm_page_queue_free_lock);
		splx(s);
		if (free >= cnt.v_free_target)
			break;

		cnt.v_scan++;
		next = m->pageq.tqe_next;

		/*
		 * If the page has been referenced, move it back to the
		 * active queue.
		 */
		if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
			vm_page_activate(m);
			cnt.v_reactivated++;
			continue;
		}

		/*
		 * If the page is clean, free it up.
		 */
		if (m->flags & PG_CLEAN) {
			object = m->object;
			if (vm_object_lock_try(object)) {
				pmap_page_protect(VM_PAGE_TO_PHYS(m),
						  VM_PROT_NONE);
				vm_page_free(m);
				pages_freed++;
				cnt.v_dfree++;
				vm_object_unlock(object);
			}
			continue;
		}

		/*
		 * If the page is dirty but already being washed, skip it.
		 */
		if ((m->flags & PG_LAUNDRY) == 0)
			continue;

		/*
		 * Otherwise the page is dirty and still in the laundry,
		 * so we start the cleaning operation and remove it from
		 * the laundry.
		 */
		object = m->object;
		if (!vm_object_lock_try(object))
			continue;
		cnt.v_pageouts++;
#ifdef CLUSTERED_PAGEOUT
		if (object->pager &&
		    vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
			vm_pageout_cluster(m, object);
		else
#endif
		vm_pageout_page(m, object);
		thread_wakeup((int) object);
		vm_object_unlock(object);
		/*
		 * Former next page may no longer even be on the inactive
		 * queue (due to potential blocking in the pager with the
		 * queues unlocked).  If it isn't, we just start over.
		 */
		if (next && (next->flags & PG_INACTIVE) == 0)
			next = vm_page_queue_inactive.tqh_first;
	}
	
	/*
	 *	Compute the page shortage.  If we are still very low on memory
	 *	be sure that we will move a minimal amount of pages from active
	 *	to inactive.
	 */

	page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
	if (page_shortage <= 0 && pages_freed == 0)
		page_shortage = 1;

	while (page_shortage > 0) {
		/*
		 *	Move some more pages from active to inactive.
		 */

		if ((m = vm_page_queue_active.tqh_first) == NULL)
			break;
		vm_page_deactivate(m);
		page_shortage--;
	}

	vm_page_unlock_queues();
}
Exemplo n.º 9
0
static int
uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio)
{
	vm_page_t m;
	vm_pindex_t idx;
	size_t tlen;
	int error, offset, rv;

	idx = OFF_TO_IDX(uio->uio_offset);
	offset = uio->uio_offset & PAGE_MASK;
	tlen = MIN(PAGE_SIZE - offset, len);

	VM_OBJECT_WLOCK(obj);

	/*
	 * Parallel reads of the page content from disk are prevented
	 * by exclusive busy.
	 *
	 * Although the tmpfs vnode lock is held here, it is
	 * nonetheless safe to sleep waiting for a free page.  The
	 * pageout daemon does not need to acquire the tmpfs vnode
	 * lock to page out tobj's pages because tobj is a OBJT_SWAP
	 * type object.
	 */
	m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL);
	if (m->valid != VM_PAGE_BITS_ALL) {
		if (vm_pager_has_page(obj, idx, NULL, NULL)) {
			rv = vm_pager_get_pages(obj, &m, 1, 0);
			m = vm_page_lookup(obj, idx);
			if (m == NULL) {
				printf(
		    "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n",
				    obj, idx, rv);
				VM_OBJECT_WUNLOCK(obj);
				return (EIO);
			}
			if (rv != VM_PAGER_OK) {
				printf(
	    "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n",
				    obj, idx, m->valid, rv);
				vm_page_lock(m);
				vm_page_free(m);
				vm_page_unlock(m);
				VM_OBJECT_WUNLOCK(obj);
				return (EIO);
			}
		} else
			vm_page_zero_invalid(m, TRUE);
	}
	vm_page_xunbusy(m);
	vm_page_lock(m);
	vm_page_hold(m);
	vm_page_unlock(m);
	VM_OBJECT_WUNLOCK(obj);
	error = uiomove_fromphys(&m, offset, tlen, uio);
	if (uio->uio_rw == UIO_WRITE && error == 0) {
		VM_OBJECT_WLOCK(obj);
		vm_page_dirty(m);
		VM_OBJECT_WUNLOCK(obj);
	}
	vm_page_lock(m);
	vm_page_unhold(m);
	if (m->queue == PQ_NONE) {
		vm_page_deactivate(m);
	} else {
		/* Requeue to maintain LRU ordering. */
		vm_page_requeue(m);
	}
	vm_page_unlock(m);

	return (error);
}
Exemplo n.º 10
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * smbfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		  int a_reqpage, vm_ooffset_t a_offset)
 */
int
smbfs_getpages(struct vop_getpages_args *ap)
{
#ifdef SMBFS_RWGENERIC
	return vop_stdgetpages(ap);
#else
	int i, error, npages;
	int doclose;
	size_t size, toff, nextoff, count;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct smbmount *smp;
	struct smbnode *np;
	struct smb_cred scred;
	vm_page_t *pages;

	KKASSERT(td->td_proc);

	vp = ap->a_vp;
	cred = td->td_proc->p_ucred;
	np = VTOSMB(vp);
	smp = VFSTOSMBFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("smbfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}
	smb_makescred(&scred, td, cred);

	bp = getpbuf_kva(&smbfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	/*
	 * This is kinda nasty.  Since smbfs is physically closing the
	 * fid on close(), we have to reopen it if necessary.  There are
	 * other races here too, such as if another process opens the same
	 * file while we are blocked in read. XXX
	 */
	error = 0;
	doclose = 0;
	if (np->n_opencount == 0) {
		error = smbfs_smb_open(np, SMB_AM_OPENREAD, &scred);
		if (error == 0)
			doclose = 1;
	}
	if (error == 0)
		error = smb_read(smp->sm_share, np->n_fid, &uio, &scred);
	if (doclose)
		smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &smbfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("smbfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* SMBFS_RWGENERIC */
}
Exemplo n.º 11
0
/*
    struct vnop_getpages_args {
        struct vnode *a_vp;
        vm_page_t *a_m;
        int a_count;
        int a_reqpage;
        vm_ooffset_t a_offset;
    };
*/
static int
fuse_vnop_getpages(struct vop_getpages_args *ap)
{
	int i, error, nextoff, size, toff, count, npages;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	vm_page_t *pages;

	FS_DEBUG2G("heh\n");

	vp = ap->a_vp;
	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
	td = curthread;			/* XXX */
	cred = curthread->td_ucred;	/* XXX */
	pages = ap->a_m;
	count = ap->a_count;

	if (!fsess_opt_mmap(vnode_mount(vp))) {
		FS_DEBUG("called on non-cacheable vnode??\n");
		return (VM_PAGER_ERROR);
	}
	npages = btoc(count);

	/*
	 * If the requested page is partially valid, just return it and
	 * allow the pager to zero-out the blanks.  Partially valid pages
	 * can only occur at the file EOF.
	 */

	VM_OBJECT_LOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	if (pages[ap->a_reqpage]->valid != 0) {
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_UNLOCK(vp->v_object);
		return 0;
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_UNLOCK(vp->v_object);

	/*
	 * We use only the kva address for the buffer, but this is extremely
	 * convienient and fast.
	 */
	bp = getpbuf(&fuse_pbuf_freecnt);

	kva = (vm_offset_t)bp->b_data;
	pmap_qenter(kva, pages, npages);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, npages);

	iov.iov_base = (caddr_t)kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &fuse_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		FS_DEBUG("error %d\n", error);
		VM_OBJECT_LOCK(vp->v_object);
		fuse_vm_page_lock_queues();
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_UNLOCK(vp->v_object);
		return VM_PAGER_ERROR;
	}
	/*
	 * Calculate the number of bytes read and validate only that number
	 * of bytes.  Note that due to pending writes, size may be 0.  This
	 * does not mean that the remaining data is invalid!
	 */

	size = count - uio.uio_resid;
	VM_OBJECT_LOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;

		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		if (nextoff <= size) {
			/*
			 * Read operation filled an entire page
			 */
			m->valid = VM_PAGE_BITS_ALL;
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else if (size > toff) {
			/*
			 * Read operation filled a partial page.
			 */
			m->valid = 0;
			vm_page_set_valid_range(m, 0, size - toff);
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else {
			/*
			 * Read operation was short.  If no error occured
			 * we may have hit a zero-fill section.   We simply
			 * leave valid set to 0.
			 */
			;
		}
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->oflags & VPO_WANTED) {
					fuse_vm_page_lock(m);
					vm_page_activate(m);
					fuse_vm_page_unlock(m);
				} else {
					fuse_vm_page_lock(m);
					vm_page_deactivate(m);
					fuse_vm_page_unlock(m);
				}
				vm_page_wakeup(m);
			} else {
				fuse_vm_page_lock(m);
				vm_page_free(m);
				fuse_vm_page_unlock(m);
			}
		}
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_UNLOCK(vp->v_object);
	return 0;
}
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 *
 * With all the caching local media devices do these days there is really
 * very little point to attempting to restrict the I/O size to contiguous
 * blocks on-disk, especially if our caller thinks we need all the specified
 * pages.  Just construct and issue a READ.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *mpp, int bytecount,
			     int reqpage, int seqaccess)
{
	struct iovec aiov;
	struct uio auio;
	off_t foff;
	int error;
	int count;
	int i;
	int ioflags;

	/*
	 * Do not do anything if the vnode is bad.
	 */
	if (vp->v_mount == NULL)
		return VM_PAGER_BAD;

	/*
	 * Calculate the number of pages.  Since we are paging in whole
	 * pages, adjust bytecount to be an integral multiple of the page
	 * size.  It will be clipped to the file EOF later on.
	 */
	bytecount = round_page(bytecount);
	count = bytecount / PAGE_SIZE;

	/*
	 * We could check m[reqpage]->valid here and shortcut the operation,
	 * but doing so breaks read-ahead.  Instead assume that the VM
	 * system has already done at least the check, don't worry about
	 * any races, and issue the VOP_READ to allow read-ahead to function.
	 *
	 * This keeps the pipeline full for I/O bound sequentially scanned
	 * mmap()'s
	 */
	/* don't shortcut */

	/*
	 * Discard pages past the file EOF.  If the requested page is past
	 * the file EOF we just leave its valid bits set to 0, the caller
	 * expects to maintain ownership of the requested page.  If the
	 * entire range is past file EOF discard everything and generate
	 * a pagein error.
	 */
	foff = IDX_TO_OFF(mpp[0]->pindex);
	if (foff >= vp->v_filesize) {
		for (i = 0; i < count; i++) {
			if (i != reqpage)
				vnode_pager_freepage(mpp[i]);
		}
		return VM_PAGER_ERROR;
	}

	if (foff + bytecount > vp->v_filesize) {
		bytecount = vp->v_filesize - foff;
		i = round_page(bytecount) / PAGE_SIZE;
		while (count > i) {
			--count;
			if (count != reqpage)
				vnode_pager_freepage(mpp[count]);
		}
	}

	/*
	 * The size of the transfer is bytecount.  bytecount will be an
	 * integral multiple of the page size unless it has been clipped
	 * to the file EOF.  The transfer cannot exceed the file EOF.
	 *
	 * When dealing with real devices we must round-up to the device
	 * sector size.
	 */
	if (vp->v_type == VBLK || vp->v_type == VCHR) {
		int secmask = vp->v_rdev->si_bsize_phys - 1;
		KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large", secmask + 1));
		bytecount = (bytecount + secmask) & ~secmask;
	}

	/*
	 * Severe hack to avoid deadlocks with the buffer cache
	 */
	for (i = 0; i < count; ++i) {
		vm_page_t mt = mpp[i];

		vm_page_io_start(mt);
		vm_page_wakeup(mt);
	}

	/*
	 * Issue the I/O with some read-ahead if bytecount > PAGE_SIZE
	 */
	ioflags = IO_VMIO;
	if (seqaccess)
		ioflags |= IO_SEQMAX << IO_SEQSHIFT;

	aiov.iov_base = NULL;
	aiov.iov_len = bytecount;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = foff;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_READ;
	auio.uio_resid = bytecount;
	auio.uio_td = NULL;
	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += count;

	error = VOP_READ(vp, &auio, ioflags, proc0.p_ucred);

	/*
	 * Severe hack to avoid deadlocks with the buffer cache
	 */
	for (i = 0; i < count; ++i) {
		vm_page_busy_wait(mpp[i], FALSE, "getpgs");
		vm_page_io_finish(mpp[i]);
	}

	/*
	 * Calculate the actual number of bytes read and clean up the
	 * page list.  
	 */
	bytecount -= auio.uio_resid;

	for (i = 0; i < count; ++i) {
		vm_page_t mt = mpp[i];

		if (i != reqpage) {
			if (error == 0 && mt->valid) {
				if (mt->flags & PG_REFERENCED)
					vm_page_activate(mt);
				else
					vm_page_deactivate(mt);
				vm_page_wakeup(mt);
			} else {
				vnode_pager_freepage(mt);
			}
		} else if (mt->valid == 0) {
			if (error == 0) {
				kprintf("page failed but no I/O error page "
					"%p object %p pindex %d\n",
					mt, mt->object, (int) mt->pindex);
				/* whoops, something happened */
				error = EINVAL;
			}
		} else if (mt->valid != VM_PAGE_BITS_ALL) {
			/*
			 * Zero-extend the requested page if necessary (if
			 * the filesystem is using a small block size).
			 */
			vm_page_zero_invalid(mt, TRUE);
		}
	}
	if (error) {
		kprintf("vnode_pager_getpage: I/O read error\n");
	}
	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
}
Exemplo n.º 13
0
/*
 * spec_getpages() - get pages associated with device vnode.
 *
 * Note that spec_read and spec_write do not use the buffer cache, so we
 * must fully implement getpages here.
 */
static int
devfs_spec_getpages(struct vop_getpages_args *ap)
{
	vm_offset_t kva;
	int error;
	int i, pcount, size;
	struct buf *bp;
	vm_page_t m;
	vm_ooffset_t offset;
	int toff, nextoff, nread;
	struct vnode *vp = ap->a_vp;
	int blksiz;
	int gotreqpage;

	error = 0;
	pcount = round_page(ap->a_count) / PAGE_SIZE;

	/*
	 * Calculate the offset of the transfer and do sanity check.
	 */
	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;

	/*
	 * Round up physical size for real devices.  We cannot round using
	 * v_mount's block size data because v_mount has nothing to do with
	 * the device.  i.e. it's usually '/dev'.  We need the physical block
	 * size for the device itself.
	 *
	 * We can't use v_rdev->si_mountpoint because it only exists when the
	 * block device is mounted.  However, we can use v_rdev.
	 */
	if (vn_isdisk(vp, NULL))
		blksiz = vp->v_rdev->si_bsize_phys;
	else
		blksiz = DEV_BSIZE;

	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);

	bp = getpbuf_kva(NULL);
	kva = (vm_offset_t)bp->b_data;

	/*
	 * Map the pages to be read into the kva.
	 */
	pmap_qenter(kva, ap->a_m, pcount);

	/* Build a minimal buffer header. */
	bp->b_cmd = BUF_CMD_READ;
	bp->b_bcount = size;
	bp->b_resid = 0;
	bsetrunningbufspace(bp, size);

	bp->b_bio1.bio_offset = offset;
	bp->b_bio1.bio_done = devfs_spec_getpages_iodone;

	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += pcount;

	/* Do the input. */
	vn_strategy(ap->a_vp, &bp->b_bio1);

	crit_enter();

	/* We definitely need to be at splbio here. */
	while (bp->b_cmd != BUF_CMD_DONE)
		tsleep(bp, 0, "spread", 0);

	crit_exit();

	if (bp->b_flags & B_ERROR) {
		if (bp->b_error)
			error = bp->b_error;
		else
			error = EIO;
	}

	/*
	 * If EOF is encountered we must zero-extend the result in order
	 * to ensure that the page does not contain garabge.  When no
	 * error occurs, an early EOF is indicated if b_bcount got truncated.
	 * b_resid is relative to b_bcount and should be 0, but some devices
	 * might indicate an EOF with b_resid instead of truncating b_bcount.
	 */
	nread = bp->b_bcount - bp->b_resid;
	if (nread < ap->a_count)
		bzero((caddr_t)kva + nread, ap->a_count - nread);
	pmap_qremove(kva, pcount);

	gotreqpage = 0;
	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
		nextoff = toff + PAGE_SIZE;
		m = ap->a_m[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: vm_page_undirty/clear_dirty etc do not clear the
		 *	 pmap modified bit.  pmap modified bit should have
		 *	 already been cleared.
		 */
		if (nextoff <= nread) {
			m->valid = VM_PAGE_BITS_ALL;
			vm_page_undirty(m);
		} else if (toff < nread) {
			/*
			 * Since this is a VM request, we have to supply the
			 * unaligned offset to allow vm_page_set_valid()
			 * to zero sub-DEV_BSIZE'd portions of the page.
			 */
			vm_page_set_valid(m, 0, nread - toff);
			vm_page_clear_dirty_end_nonincl(m, 0, nread - toff);
		} else {
			m->valid = 0;
			vm_page_undirty(m);
		}

		if (i != ap->a_reqpage) {
			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
				if (m->valid) {
					if (m->flags & PG_REFERENCED) {
						vm_page_activate(m);
					} else {
						vm_page_deactivate(m);
					}
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
				}
			} else {
				vm_page_free(m);
			}
		} else if (m->valid) {
			gotreqpage = 1;
			/*
			 * Since this is a VM request, we need to make the
			 * entire page presentable by zeroing invalid sections.
			 */
			if (m->valid != VM_PAGE_BITS_ALL)
			    vm_page_zero_invalid(m, FALSE);
		}
	}
	if (!gotreqpage) {
		m = ap->a_m[ap->a_reqpage];
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
			devtoname(vp->v_rdev), error, bp, bp->b_vp);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
		    size, bp->b_resid, ap->a_count, m->valid);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
		/*
		 * Free the buffer header back to the swap buffer pool.
		 */
		relpbuf(bp, NULL);
		return VM_PAGER_ERROR;
	}
	/*
	 * Free the buffer header back to the swap buffer pool.
	 */
	relpbuf(bp, NULL);
	if (DEVFS_NODE(ap->a_vp))
		nanotime(&DEVFS_NODE(ap->a_vp)->mtime);
	return VM_PAGER_OK;
}