/* * Same as above, but forces the page to be detached from the object * and go into free pool. */ void sf_ext_free_nocache(void *arg1, void *arg2) { struct sf_buf *sf = arg1; struct sendfile_sync *sfs = arg2; vm_page_t pg = sf_buf_page(sf); sf_buf_free(sf); vm_page_lock(pg); if (vm_page_unwire(pg, PQ_NONE)) { vm_object_t obj; /* Try to free the page, but only if it is cheap to. */ if ((obj = pg->object) == NULL) vm_page_free(pg); else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { vm_page_free(pg); VM_OBJECT_WUNLOCK(obj); } else vm_page_deactivate(pg); } vm_page_unlock(pg); if (sfs != NULL) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } }
static inline void release_page(struct faultstate *fs) { vm_page_wakeup(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); fs->m = NULL; }
/* * Speed up the reclamation of up to "distance" pages that precede the * faulting pindex within the first object of the shadow chain. */ static void vm_fault_cache_behind(const struct faultstate *fs, int distance) { vm_object_t first_object, object; vm_page_t m, m_prev; vm_pindex_t pindex; object = fs->object; VM_OBJECT_ASSERT_WLOCKED(object); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYWLOCK(first_object)) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WLOCK(first_object); VM_OBJECT_WLOCK(object); } } /* Neither fictitious nor unmanaged pages can be cached. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { if (fs->first_pindex < distance) pindex = 0; else pindex = fs->first_pindex - distance; if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; KASSERT((m->oflags & VPO_BUSY) != 0, ("vm_fault_cache_behind: page %p is not busy", m)); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { pmap_remove_all(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (m->dirty != 0) vm_page_deactivate(m); else vm_page_cache(m); } vm_page_unlock(m); } } if (first_object != object) VM_OBJECT_WUNLOCK(first_object); }
/* * Speed up the reclamation of up to "distance" pages that precede the * faulting pindex within the first object of the shadow chain. */ static void vm_fault_cache_behind(const struct faultstate *fs, int distance) { vm_object_t first_object, object; vm_page_t m, m_prev; vm_pindex_t pindex; object = fs->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYLOCK(first_object)) { VM_OBJECT_UNLOCK(object); VM_OBJECT_LOCK(first_object); VM_OBJECT_LOCK(object); } } if (first_object->type != OBJT_DEVICE && first_object->type != OBJT_PHYS && first_object->type != OBJT_SG) { if (fs->first_pindex < distance) pindex = 0; else pindex = fs->first_pindex - distance; if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; KASSERT((m->oflags & VPO_BUSY) != 0, ("vm_fault_cache_behind: page %p is not busy", m)); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { pmap_remove_all(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (m->dirty != 0) vm_page_deactivate(m); else vm_page_cache(m); } vm_page_unlock(m); } } if (first_object != object) VM_OBJECT_UNLOCK(first_object); }
int memory_object_control_uiomove( memory_object_control_t control, memory_object_offset_t offset, void * uio, int start_offset, int io_requested, int mark_dirty, int take_reference) { vm_object_t object; vm_page_t dst_page; int xsize; int retval = 0; int cur_run; int cur_needed; int i; int orig_offset; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { return (0); } assert(!object->internal); vm_object_lock(object); if (mark_dirty && object->copy != VM_OBJECT_NULL) { /* * We can't modify the pages without honoring * copy-on-write obligations first, so fall off * this optimized path and fall back to the regular * path. */ vm_object_unlock(object); return 0; } orig_offset = start_offset; while (io_requested && retval == 0) { cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; if (cur_needed > MAX_RUN) cur_needed = MAX_RUN; for (cur_run = 0; cur_run < cur_needed; ) { if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; /* * if we're in this routine, we are inside a filesystem's * locking model, so we don't ever want to wait for pages that have * list_req_pending == TRUE since it means that the * page is a candidate for some type of I/O operation, * but that it has not yet been gathered into a UPL... * this implies that it is still outside the domain * of the filesystem and that whoever is responsible for * grabbing it into a UPL may be stuck behind the filesystem * lock this thread owns, or trying to take a lock exclusively * and waiting for the readers to drain from a rw lock... * if we block in those cases, we will deadlock */ if (dst_page->list_req_pending) { if (dst_page->absent) { /* * this is the list_req_pending | absent | busy case * which originates from vm_fault_page... we want * to fall out of the fast path and go back * to the caller which will gather this page * into a UPL and issue the I/O if no one * else beats us to it */ break; } if (dst_page->pageout || dst_page->cleaning) { /* * this is the list_req_pending | pageout | busy case * or the list_req_pending | cleaning case... * which originate from the pageout_scan and * msync worlds for the pageout case and the hibernate * pre-cleaning world for the cleaning case... * we need to reset the state of this page to indicate * it should stay in the cache marked dirty... nothing else we * can do at this point... we can't block on it, we can't busy * it and we can't clean it from this routine. */ vm_page_lockspin_queues(); vm_pageout_queue_steal(dst_page, TRUE); vm_page_deactivate(dst_page); vm_page_unlock_queues(); } /* * this is the list_req_pending | cleaning case... * we can go ahead and deal with this page since * its ok for us to mark this page busy... if a UPL * tries to gather this page, it will block until the * busy is cleared, thus allowing us safe use of the page * when we're done with it, we will clear busy and wake * up anyone waiting on it, thus allowing the UPL creation * to finish */ } else if (dst_page->busy || dst_page->cleaning) { /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ if (cur_run) break; PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } /* * this routine is only called when copying * to/from real files... no need to consider * encrypted swap pages */ assert(!dst_page->encrypted); if (mark_dirty) { dst_page->dirty = TRUE; if (dst_page->cs_validated && !dst_page->cs_tainted) { /* * CODE SIGNING: * We're modifying a code-signed * page: force revalidate */ dst_page->cs_validated = FALSE; #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif pmap_disconnect(dst_page->phys_page); } } dst_page->busy = TRUE; page_run[cur_run++] = dst_page; offset += PAGE_SIZE_64; } if (cur_run == 0) /* * we hit a 'hole' in the cache or * a page we don't want to try to handle, * so bail at this point * we'll unlock the object below */ break; vm_object_unlock(object); for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) break; io_requested -= xsize; start_offset = 0; } vm_object_lock(object); /* * if we have more than 1 page to work on * in the current run, or the original request * started at offset 0 of the page, or we're * processing multiple batches, we will move * the pages to the tail of the inactive queue * to implement an LRU for read/write accesses * * the check for orig_offset == 0 is there to * mitigate the cost of small (< page_size) requests * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { vm_page_lockspin_queues(); for (i = 0; i < cur_run; i++) vm_page_lru(page_run[i]); vm_page_unlock_queues(); } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; /* * someone is explicitly referencing this page... * update clustered and speculative state * */ VM_PAGE_CONSUME_CLUSTERED(dst_page); PAGE_WAKEUP_DONE(dst_page); } orig_offset = 0; } vm_object_unlock(object); return (retval); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m, ma[1]; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) { VM_OBJECT_UNLOCK(object); return (EBUSY); } /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_lookup(object, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { vm_page_sleep(m, "shmtrc"); goto retry; } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, idx); } else /* A cached page was reactivated. */ rv = VM_PAGER_OK; vm_page_lock(m); if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); vm_page_wakeup(m); } else { vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_pager_page_unswapped(m); } } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
/* * Vnode op for VM getpages. * Wish wish .... get rid from multiple IO routines * * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, * int a_reqpage, vm_ooffset_t a_offset) */ int nwfs_getpages(struct vop_getpages_args *ap) { #ifndef NWFS_RWCACHE return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage, ap->a_seqaccess); #else int i, error, npages; size_t nextoff, toff; size_t count; size_t size; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td = curthread; /* XXX */ struct ucred *cred; struct nwmount *nmp; struct nwnode *np; vm_page_t *pages; KKASSERT(td->td_proc); cred = td->td_proc->p_ucred; vp = ap->a_vp; np = VTONW(vp); nmp = VFSTONWFS(vp->v_mount); pages = ap->a_m; count = (size_t)ap->a_count; if (vp->v_object == NULL) { kprintf("nwfs_getpages: called with non-merged cache vnode??\n"); return VM_PAGER_ERROR; } bp = getpbuf_kva(&nwfs_pbuf_freecnt); npages = btoc(count); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred); pmap_qremove(kva, npages); relpbuf(bp, &nwfs_pbuf_freecnt); if (error && (uio.uio_resid == count)) { kprintf("nwfs_getpages: error %d\n",error); for (i = 0; i < npages; i++) { if (ap->a_reqpage != i) vnode_pager_freepage(pages[i]); } return VM_PAGER_ERROR; } size = count - uio.uio_resid; for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; m->flags &= ~PG_ZERO; /* * NOTE: pmap dirty bit should have already been cleared. * We do not clear it here. */ if (nextoff <= size) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->flags & PG_REFERENCED) vm_page_activate(m); else vm_page_deactivate(m); vm_page_wakeup(m); } else { vnode_pager_freepage(m); } } } return 0; #endif /* NWFS_RWCACHE */ }
/* * vm_pageout_scan does the dirty work for the pageout daemon. */ void vm_pageout_scan() { register vm_page_t m, next; register int page_shortage; register int s; register int pages_freed; int free; vm_object_t object; /* * Only continue when we want more pages to be "free" */ cnt.v_rev++; s = splimp(); simple_lock(&vm_page_queue_free_lock); free = cnt.v_free_count; simple_unlock(&vm_page_queue_free_lock); splx(s); if (free < cnt.v_free_target) { swapout_threads(); /* * Be sure the pmap system is updated so * we can scan the inactive queue. */ pmap_update(); } /* * Acquire the resident page system lock, * as we may be changing what's resident quite a bit. */ vm_page_lock_queues(); /* * Start scanning the inactive queue for pages we can free. * We keep scanning until we have enough free pages or * we have scanned through the entire queue. If we * encounter dirty pages, we start cleaning them. */ pages_freed = 0; for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) { s = splimp(); simple_lock(&vm_page_queue_free_lock); free = cnt.v_free_count; simple_unlock(&vm_page_queue_free_lock); splx(s); if (free >= cnt.v_free_target) break; cnt.v_scan++; next = m->pageq.tqe_next; /* * If the page has been referenced, move it back to the * active queue. */ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { vm_page_activate(m); cnt.v_reactivated++; continue; } /* * If the page is clean, free it up. */ if (m->flags & PG_CLEAN) { object = m->object; if (vm_object_lock_try(object)) { pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); vm_page_free(m); pages_freed++; cnt.v_dfree++; vm_object_unlock(object); } continue; } /* * If the page is dirty but already being washed, skip it. */ if ((m->flags & PG_LAUNDRY) == 0) continue; /* * Otherwise the page is dirty and still in the laundry, * so we start the cleaning operation and remove it from * the laundry. */ object = m->object; if (!vm_object_lock_try(object)) continue; cnt.v_pageouts++; #ifdef CLUSTERED_PAGEOUT if (object->pager && vm_pager_cancluster(object->pager, PG_CLUSTERPUT)) vm_pageout_cluster(m, object); else #endif vm_pageout_page(m, object); thread_wakeup((int) object); vm_object_unlock(object); /* * Former next page may no longer even be on the inactive * queue (due to potential blocking in the pager with the * queues unlocked). If it isn't, we just start over. */ if (next && (next->flags & PG_INACTIVE) == 0) next = vm_page_queue_inactive.tqh_first; } /* * Compute the page shortage. If we are still very low on memory * be sure that we will move a minimal amount of pages from active * to inactive. */ page_shortage = cnt.v_inactive_target - cnt.v_inactive_count; if (page_shortage <= 0 && pages_freed == 0) page_shortage = 1; while (page_shortage > 0) { /* * Move some more pages from active to inactive. */ if ((m = vm_page_queue_active.tqh_first) == NULL) break; vm_page_deactivate(m); page_shortage--; } vm_page_unlock_queues(); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Parallel reads of the page content from disk are prevented * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { rv = vm_pager_get_pages(obj, &m, 1, 0); m = vm_page_lookup(obj, idx); if (m == NULL) { printf( "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n", obj, idx, rv); VM_OBJECT_WUNLOCK(obj); return (EIO); } if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { VM_OBJECT_WLOCK(obj); vm_page_dirty(m); VM_OBJECT_WUNLOCK(obj); } vm_page_lock(m); vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { /* Requeue to maintain LRU ordering. */ vm_page_requeue(m); } vm_page_unlock(m); return (error); }
/* * Vnode op for VM getpages. * Wish wish .... get rid from multiple IO routines * * smbfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, * int a_reqpage, vm_ooffset_t a_offset) */ int smbfs_getpages(struct vop_getpages_args *ap) { #ifdef SMBFS_RWGENERIC return vop_stdgetpages(ap); #else int i, error, npages; int doclose; size_t size, toff, nextoff, count; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td = curthread; /* XXX */ struct ucred *cred; struct smbmount *smp; struct smbnode *np; struct smb_cred scred; vm_page_t *pages; KKASSERT(td->td_proc); vp = ap->a_vp; cred = td->td_proc->p_ucred; np = VTOSMB(vp); smp = VFSTOSMBFS(vp->v_mount); pages = ap->a_m; count = (size_t)ap->a_count; if (vp->v_object == NULL) { kprintf("smbfs_getpages: called with non-merged cache vnode??\n"); return VM_PAGER_ERROR; } smb_makescred(&scred, td, cred); bp = getpbuf_kva(&smbfs_pbuf_freecnt); npages = btoc(count); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; /* * This is kinda nasty. Since smbfs is physically closing the * fid on close(), we have to reopen it if necessary. There are * other races here too, such as if another process opens the same * file while we are blocked in read. XXX */ error = 0; doclose = 0; if (np->n_opencount == 0) { error = smbfs_smb_open(np, SMB_AM_OPENREAD, &scred); if (error == 0) doclose = 1; } if (error == 0) error = smb_read(smp->sm_share, np->n_fid, &uio, &scred); if (doclose) smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred); pmap_qremove(kva, npages); relpbuf(bp, &smbfs_pbuf_freecnt); if (error && (uio.uio_resid == count)) { kprintf("smbfs_getpages: error %d\n",error); for (i = 0; i < npages; i++) { if (ap->a_reqpage != i) vnode_pager_freepage(pages[i]); } return VM_PAGER_ERROR; } size = count - uio.uio_resid; for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; m->flags &= ~PG_ZERO; /* * NOTE: pmap dirty bit should have already been cleared. * We do not clear it here. */ if (nextoff <= size) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->flags & PG_REFERENCED) vm_page_activate(m); else vm_page_deactivate(m); vm_page_wakeup(m); } else { vnode_pager_freepage(m); } } } return 0; #endif /* SMBFS_RWGENERIC */ }
/* struct vnop_getpages_args { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_reqpage; vm_ooffset_t a_offset; }; */ static int fuse_vnop_getpages(struct vop_getpages_args *ap) { int i, error, nextoff, size, toff, count, npages; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td; struct ucred *cred; vm_page_t *pages; FS_DEBUG2G("heh\n"); vp = ap->a_vp; KASSERT(vp->v_object, ("objectless vp passed to getpages")); td = curthread; /* XXX */ cred = curthread->td_ucred; /* XXX */ pages = ap->a_m; count = ap->a_count; if (!fsess_opt_mmap(vnode_mount(vp))) { FS_DEBUG("called on non-cacheable vnode??\n"); return (VM_PAGER_ERROR); } npages = btoc(count); /* * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. */ VM_OBJECT_LOCK(vp->v_object); fuse_vm_page_lock_queues(); if (pages[ap->a_reqpage]->valid != 0) { for (i = 0; i < npages; ++i) { if (i != ap->a_reqpage) { fuse_vm_page_lock(pages[i]); vm_page_free(pages[i]); fuse_vm_page_unlock(pages[i]); } } fuse_vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vp->v_object); return 0; } fuse_vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vp->v_object); /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(&fuse_pbuf_freecnt); kva = (vm_offset_t)bp->b_data; pmap_qenter(kva, pages, npages); PCPU_INC(cnt.v_vnodein); PCPU_ADD(cnt.v_vnodepgsin, npages); iov.iov_base = (caddr_t)kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred); pmap_qremove(kva, npages); relpbuf(bp, &fuse_pbuf_freecnt); if (error && (uio.uio_resid == count)) { FS_DEBUG("error %d\n", error); VM_OBJECT_LOCK(vp->v_object); fuse_vm_page_lock_queues(); for (i = 0; i < npages; ++i) { if (i != ap->a_reqpage) { fuse_vm_page_lock(pages[i]); vm_page_free(pages[i]); fuse_vm_page_unlock(pages[i]); } } fuse_vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vp->v_object); return VM_PAGER_ERROR; } /* * Calculate the number of bytes read and validate only that number * of bytes. Note that due to pending writes, size may be 0. This * does not mean that the remaining data is invalid! */ size = count - uio.uio_resid; VM_OBJECT_LOCK(vp->v_object); fuse_vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; if (nextoff <= size) { /* * Read operation filled an entire page */ m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("fuse_getpages: page %p is dirty", m)); } else if (size > toff) { /* * Read operation filled a partial page. */ m->valid = 0; vm_page_set_valid_range(m, 0, size - toff); KASSERT(m->dirty == 0, ("fuse_getpages: page %p is dirty", m)); } else { /* * Read operation was short. If no error occured * we may have hit a zero-fill section. We simply * leave valid set to 0. */ ; } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->oflags & VPO_WANTED) { fuse_vm_page_lock(m); vm_page_activate(m); fuse_vm_page_unlock(m); } else { fuse_vm_page_lock(m); vm_page_deactivate(m); fuse_vm_page_unlock(m); } vm_page_wakeup(m); } else { fuse_vm_page_lock(m); vm_page_free(m); fuse_vm_page_unlock(m); } } } fuse_vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vp->v_object); return 0; }
/* * This is now called from local media FS's to operate against their * own vnodes if they fail to implement VOP_GETPAGES. * * With all the caching local media devices do these days there is really * very little point to attempting to restrict the I/O size to contiguous * blocks on-disk, especially if our caller thinks we need all the specified * pages. Just construct and issue a READ. */ int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *mpp, int bytecount, int reqpage, int seqaccess) { struct iovec aiov; struct uio auio; off_t foff; int error; int count; int i; int ioflags; /* * Do not do anything if the vnode is bad. */ if (vp->v_mount == NULL) return VM_PAGER_BAD; /* * Calculate the number of pages. Since we are paging in whole * pages, adjust bytecount to be an integral multiple of the page * size. It will be clipped to the file EOF later on. */ bytecount = round_page(bytecount); count = bytecount / PAGE_SIZE; /* * We could check m[reqpage]->valid here and shortcut the operation, * but doing so breaks read-ahead. Instead assume that the VM * system has already done at least the check, don't worry about * any races, and issue the VOP_READ to allow read-ahead to function. * * This keeps the pipeline full for I/O bound sequentially scanned * mmap()'s */ /* don't shortcut */ /* * Discard pages past the file EOF. If the requested page is past * the file EOF we just leave its valid bits set to 0, the caller * expects to maintain ownership of the requested page. If the * entire range is past file EOF discard everything and generate * a pagein error. */ foff = IDX_TO_OFF(mpp[0]->pindex); if (foff >= vp->v_filesize) { for (i = 0; i < count; i++) { if (i != reqpage) vnode_pager_freepage(mpp[i]); } return VM_PAGER_ERROR; } if (foff + bytecount > vp->v_filesize) { bytecount = vp->v_filesize - foff; i = round_page(bytecount) / PAGE_SIZE; while (count > i) { --count; if (count != reqpage) vnode_pager_freepage(mpp[count]); } } /* * The size of the transfer is bytecount. bytecount will be an * integral multiple of the page size unless it has been clipped * to the file EOF. The transfer cannot exceed the file EOF. * * When dealing with real devices we must round-up to the device * sector size. */ if (vp->v_type == VBLK || vp->v_type == VCHR) { int secmask = vp->v_rdev->si_bsize_phys - 1; KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large", secmask + 1)); bytecount = (bytecount + secmask) & ~secmask; } /* * Severe hack to avoid deadlocks with the buffer cache */ for (i = 0; i < count; ++i) { vm_page_t mt = mpp[i]; vm_page_io_start(mt); vm_page_wakeup(mt); } /* * Issue the I/O with some read-ahead if bytecount > PAGE_SIZE */ ioflags = IO_VMIO; if (seqaccess) ioflags |= IO_SEQMAX << IO_SEQSHIFT; aiov.iov_base = NULL; aiov.iov_len = bytecount; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = foff; auio.uio_segflg = UIO_NOCOPY; auio.uio_rw = UIO_READ; auio.uio_resid = bytecount; auio.uio_td = NULL; mycpu->gd_cnt.v_vnodein++; mycpu->gd_cnt.v_vnodepgsin += count; error = VOP_READ(vp, &auio, ioflags, proc0.p_ucred); /* * Severe hack to avoid deadlocks with the buffer cache */ for (i = 0; i < count; ++i) { vm_page_busy_wait(mpp[i], FALSE, "getpgs"); vm_page_io_finish(mpp[i]); } /* * Calculate the actual number of bytes read and clean up the * page list. */ bytecount -= auio.uio_resid; for (i = 0; i < count; ++i) { vm_page_t mt = mpp[i]; if (i != reqpage) { if (error == 0 && mt->valid) { if (mt->flags & PG_REFERENCED) vm_page_activate(mt); else vm_page_deactivate(mt); vm_page_wakeup(mt); } else { vnode_pager_freepage(mt); } } else if (mt->valid == 0) { if (error == 0) { kprintf("page failed but no I/O error page " "%p object %p pindex %d\n", mt, mt->object, (int) mt->pindex); /* whoops, something happened */ error = EINVAL; } } else if (mt->valid != VM_PAGE_BITS_ALL) { /* * Zero-extend the requested page if necessary (if * the filesystem is using a small block size). */ vm_page_zero_invalid(mt, TRUE); } } if (error) { kprintf("vnode_pager_getpage: I/O read error\n"); } return (error ? VM_PAGER_ERROR : VM_PAGER_OK); }
/* * spec_getpages() - get pages associated with device vnode. * * Note that spec_read and spec_write do not use the buffer cache, so we * must fully implement getpages here. */ static int devfs_spec_getpages(struct vop_getpages_args *ap) { vm_offset_t kva; int error; int i, pcount, size; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer and do sanity check. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* * Round up physical size for real devices. We cannot round using * v_mount's block size data because v_mount has nothing to do with * the device. i.e. it's usually '/dev'. We need the physical block * size for the device itself. * * We can't use v_rdev->si_mountpoint because it only exists when the * block device is mounted. However, we can use v_rdev. */ if (vn_isdisk(vp, NULL)) blksiz = vp->v_rdev->si_bsize_phys; else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf_kva(NULL); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_cmd = BUF_CMD_READ; bp->b_bcount = size; bp->b_resid = 0; bsetrunningbufspace(bp, size); bp->b_bio1.bio_offset = offset; bp->b_bio1.bio_done = devfs_spec_getpages_iodone; mycpu->gd_cnt.v_vnodein++; mycpu->gd_cnt.v_vnodepgsin += pcount; /* Do the input. */ vn_strategy(ap->a_vp, &bp->b_bio1); crit_enter(); /* We definitely need to be at splbio here. */ while (bp->b_cmd != BUF_CMD_DONE) tsleep(bp, 0, "spread", 0); crit_exit(); if (bp->b_flags & B_ERROR) { if (bp->b_error) error = bp->b_error; else error = EIO; } /* * If EOF is encountered we must zero-extend the result in order * to ensure that the page does not contain garabge. When no * error occurs, an early EOF is indicated if b_bcount got truncated. * b_resid is relative to b_bcount and should be 0, but some devices * might indicate an EOF with b_resid instead of truncating b_bcount. */ nread = bp->b_bcount - bp->b_resid; if (nread < ap->a_count) bzero((caddr_t)kva + nread, ap->a_count - nread); pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; /* * NOTE: vm_page_undirty/clear_dirty etc do not clear the * pmap modified bit. pmap modified bit should have * already been cleared. */ if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; vm_page_undirty(m); } else if (toff < nread) { /* * Since this is a VM request, we have to supply the * unaligned offset to allow vm_page_set_valid() * to zero sub-DEV_BSIZE'd portions of the page. */ vm_page_set_valid(m, 0, nread - toff); vm_page_clear_dirty_end_nonincl(m, 0, nread - toff); } else { m->valid = 0; vm_page_undirty(m); } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_REFERENCED) { vm_page_activate(m); } else { vm_page_deactivate(m); } vm_page_wakeup(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; /* * Since this is a VM request, we need to make the * entire page presentable by zeroing invalid sections. */ if (m->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(m, FALSE); } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; devfs_debug(DEVFS_DEBUG_WARNING, "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n", devtoname(vp->v_rdev), error, bp, bp->b_vp); devfs_debug(DEVFS_DEBUG_WARNING, " size: %d, resid: %d, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); devfs_debug(DEVFS_DEBUG_WARNING, " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", nread, ap->a_reqpage, (u_long)m->pindex, pcount); /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp, NULL); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp, NULL); if (DEVFS_NODE(ap->a_vp)) nanotime(&DEVFS_NODE(ap->a_vp)->mtime); return VM_PAGER_OK; }