/* --------------------------------------------------------------------- */ static int tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, vm_offset_t offset, size_t tlen, struct uio *uio) { vm_page_t m; int error, rv; VM_OBJECT_LOCK(tobj); m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(tobj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } VM_OBJECT_UNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); vm_page_lock(m); vm_page_unwire(m, TRUE); vm_page_unlock(m); vm_page_wakeup(m); VM_OBJECT_UNLOCK(tobj); return (error); }
void vnode_destroy_vobject(struct vnode *vp) { struct vm_object *obj; obj = vp->v_object; if (obj == NULL) return; ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); VM_OBJECT_LOCK(obj); if (obj->ref_count == 0) { /* * vclean() may be called twice. The first time * removes the primary reference to the object, * the second time goes one further and is a * special-case to terminate the object. * * don't double-terminate the object */ if ((obj->flags & OBJ_DEAD) == 0) vm_object_terminate(obj); else VM_OBJECT_UNLOCK(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); VM_OBJECT_UNLOCK(obj); } vp->v_object = NULL; }
/* * vm_fault_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of vm_map_pmap_enter, except it runs at page fault time instead * of mmap time. */ static void vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) { int i; vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; vm_object_t object; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; object = entry->object.vm_object; starta = addra - PFBAK * PAGE_SIZE; if (starta < entry->start) { starta = entry->start; } else if (starta > addra) { starta = 0; } for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t backing_object, lobject; addr = addra + prefault_pageorder[i]; if (addr > addra + (PFFOR * PAGE_SIZE)) addr = 0; if (addr < starta || addr >= entry->end) continue; if (!pmap_is_prefaultable(pmap, addr)) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = object; VM_OBJECT_LOCK(lobject); while ((m = vm_page_lookup(lobject, pindex)) == NULL && lobject->type == OBJT_DEFAULT && (backing_object = lobject->backing_object) != NULL) { KASSERT((lobject->backing_object_offset & PAGE_MASK) == 0, ("vm_fault_prefault: unaligned object offset")); pindex += lobject->backing_object_offset >> PAGE_SHIFT; VM_OBJECT_LOCK(backing_object); VM_OBJECT_UNLOCK(lobject); lobject = backing_object; } /* * give-up when a page is not in memory */ if (m == NULL) { VM_OBJECT_UNLOCK(lobject); break; } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); VM_OBJECT_UNLOCK(lobject); } }
/* --------------------------------------------------------------------- */ static int tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, vm_offset_t offset, size_t tlen, struct uio *uio) { vm_page_t m; int error; VM_OBJECT_LOCK(tobj); vm_object_pip_add(tobj, 1); m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { error = vm_pager_get_pages(tobj, &m, 1, 0); if (error != 0) { printf("tmpfs get pages from pager error [read]\n"); goto out; } } else vm_page_zero_invalid(m, TRUE); } VM_OBJECT_UNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); out: vm_page_lock(m); vm_page_unwire(m, TRUE); vm_page_unlock(m); vm_page_wakeup(m); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); return (error); }
/* * Helper routines to allow the backing object of a shared memory file * descriptor to be mapped in the kernel. */ int shm_map(struct file *fp, size_t size, off_t offset, void **memp) { struct shmfd *shmfd; vm_offset_t kva, ofs; vm_object_t obj; int rv; if (fp->f_type != DTYPE_SHM) return (EINVAL); shmfd = fp->f_data; obj = shmfd->shm_object; VM_OBJECT_LOCK(obj); /* * XXXRW: This validation is probably insufficient, and subject to * sign errors. It should be fixed. */ if (offset >= shmfd->shm_size || offset + size > round_page(shmfd->shm_size)) { VM_OBJECT_UNLOCK(obj); return (EINVAL); } shmfd->shm_kmappings++; vm_object_reference_locked(obj); VM_OBJECT_UNLOCK(obj); /* Map the object into the kernel_map and wire it. */ kva = vm_map_min(kernel_map); ofs = offset & PAGE_MASK; offset = trunc_page(offset); size = round_page(size + ofs); rv = vm_map_find(kernel_map, obj, offset, &kva, size, VMFS_ALIGNED_SPACE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0); if (rv == KERN_SUCCESS) { rv = vm_map_wire(kernel_map, kva, kva + size, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (rv == KERN_SUCCESS) { *memp = (void *)(kva + ofs); return (0); } vm_map_remove(kernel_map, kva, kva + size); } else vm_object_deallocate(obj); /* On failure, drop our mapping reference. */ VM_OBJECT_LOCK(obj); shmfd->shm_kmappings--; VM_OBJECT_UNLOCK(obj); return (vm_mmap_to_errno(rv)); }
static vm_page_t rtR0MemObjFreeBSDContigPhysAllocHelper(vm_object_t pObject, vm_pindex_t iPIndex, u_long cPages, vm_paddr_t VmPhysAddrHigh, u_long uAlignment, bool fWire) { vm_page_t pPages; int cTries = 0; #if __FreeBSD_version > 1000000 int fFlags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; if (fWire) fFlags |= VM_ALLOC_WIRED; while (cTries <= 1) { VM_OBJECT_LOCK(pObject); pPages = vm_page_alloc_contig(pObject, iPIndex, fFlags, cPages, 0, VmPhysAddrHigh, uAlignment, 0, VM_MEMATTR_DEFAULT); VM_OBJECT_UNLOCK(pObject); if (pPages) break; vm_pageout_grow_cache(cTries, 0, VmPhysAddrHigh); cTries++; } return pPages; #else while (cTries <= 1) { pPages = vm_phys_alloc_contig(cPages, 0, VmPhysAddrHigh, uAlignment, 0); if (pPages) break; vm_contig_grow_cache(cTries, 0, VmPhysAddrHigh); cTries++; } if (!pPages) return pPages; VM_OBJECT_LOCK(pObject); for (vm_pindex_t iPage = 0; iPage < cPages; iPage++) { vm_page_t pPage = pPages + iPage; vm_page_insert(pPage, pObject, iPIndex + iPage); pPage->valid = VM_PAGE_BITS_ALL; if (fWire) { pPage->wire_count = 1; atomic_add_int(&cnt.v_wire_count, 1); } } VM_OBJECT_UNLOCK(pObject); return pPages; #endif }
/* * Speed up the reclamation of up to "distance" pages that precede the * faulting pindex within the first object of the shadow chain. */ static void vm_fault_cache_behind(const struct faultstate *fs, int distance) { vm_object_t first_object, object; vm_page_t m, m_prev; vm_pindex_t pindex; object = fs->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYLOCK(first_object)) { VM_OBJECT_UNLOCK(object); VM_OBJECT_LOCK(first_object); VM_OBJECT_LOCK(object); } } if (first_object->type != OBJT_DEVICE && first_object->type != OBJT_PHYS && first_object->type != OBJT_SG) { if (fs->first_pindex < distance) pindex = 0; else pindex = fs->first_pindex - distance; if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; KASSERT((m->oflags & VPO_BUSY) != 0, ("vm_fault_cache_behind: page %p is not busy", m)); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { pmap_remove_all(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (m->dirty != 0) vm_page_deactivate(m); else vm_page_cache(m); } vm_page_unlock(m); } } if (first_object != object) VM_OBJECT_UNLOCK(first_object); }
int ttm_tt_swapin(struct ttm_tt *ttm) { vm_object_t obj; vm_page_t from_page, to_page; int i, ret, rv; obj = ttm->swap_storage; VM_OBJECT_LOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { from_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (from_page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, i)) { rv = vm_pager_get_page(obj, &from_page, 1); if (rv != VM_PAGER_OK) { vm_page_free(from_page); ret = -EIO; goto err_ret; } } else { vm_page_zero_invalid(from_page, TRUE); } } to_page = ttm->pages[i]; if (unlikely(to_page == NULL)) { ret = -ENOMEM; vm_page_wakeup(from_page); goto err_ret; } pmap_copy_page(VM_PAGE_TO_PHYS(from_page), VM_PAGE_TO_PHYS(to_page)); vm_page_wakeup(from_page); } vm_object_pip_wakeup(obj); VM_OBJECT_UNLOCK(obj); if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP)) vm_object_deallocate(obj); ttm->swap_storage = NULL; ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED; return (0); err_ret: vm_object_pip_wakeup(obj); VM_OBJECT_UNLOCK(obj); return (ret); }
static int rtR0MemObjFreeBSDPhysAllocHelper(vm_object_t pObject, u_long cPages, vm_paddr_t VmPhysAddrHigh, u_long uAlignment, bool fContiguous, bool fWire, int rcNoMem) { if (fContiguous) { if (rtR0MemObjFreeBSDContigPhysAllocHelper(pObject, 0, cPages, VmPhysAddrHigh, uAlignment, fWire) != NULL) return VINF_SUCCESS; return rcNoMem; } for (vm_pindex_t iPage = 0; iPage < cPages; iPage++) { vm_page_t pPage = rtR0MemObjFreeBSDContigPhysAllocHelper(pObject, iPage, 1, VmPhysAddrHigh, uAlignment, fWire); if (!pPage) { /* Free all allocated pages */ VM_OBJECT_LOCK(pObject); while (iPage-- > 0) { pPage = vm_page_lookup(pObject, iPage); vm_page_lock_queues(); if (fWire) vm_page_unwire(pPage, 0); vm_page_free(pPage); vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(pObject); return rcNoMem; } } return VINF_SUCCESS; }
/* * shmfd object management including creation and reference counting * routines. */ static struct shmfd * shm_alloc(struct ucred *ucred, mode_t mode) { struct shmfd *shmfd; shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); shmfd->shm_size = 0; shmfd->shm_uid = ucred->cr_uid; shmfd->shm_gid = ucred->cr_gid; shmfd->shm_mode = mode; shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL, shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); VM_OBJECT_LOCK(shmfd->shm_object); vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING); vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT); VM_OBJECT_UNLOCK(shmfd->shm_object); vfs_timestamp(&shmfd->shm_birthtime); shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = shmfd->shm_birthtime; refcount_init(&shmfd->shm_refs, 1); #ifdef MAC mac_posixshm_init(shmfd); mac_posixshm_create(ucred, shmfd); #endif return (shmfd); }
/* * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. * * MPSAFE */ vm_object_t vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; struct vnode *vp; /* * Pageout to vnode, no can do yet. */ if (handle == NULL) return (NULL); vp = (struct vnode *) handle; /* * If the object is being terminated, wait for it to * go away. */ retry: while ((object = vp->v_object) != NULL) { VM_OBJECT_LOCK(object); if ((object->flags & OBJ_DEAD) == 0) break; vm_object_set_flag(object, OBJ_DISCONNECTWNT); msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0); } if (vp->v_usecount == 0) panic("vnode_pager_alloc: no vnode reference"); if (object == NULL) { /* * Add an object of the appropriate size */ object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); object->un_pager.vnp.vnp_size = size; object->un_pager.vnp.writemappings = 0; object->handle = handle; VI_LOCK(vp); if (vp->v_object != NULL) { /* * Object has been created while we were sleeping */ VI_UNLOCK(vp); vm_object_destroy(object); goto retry; } vp->v_object = object; VI_UNLOCK(vp); } else { object->ref_count++; VM_OBJECT_UNLOCK(object); } vref(vp); return (object); }
/* Create the VM system backing object for this vnode */ int vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) { vm_object_t object; vm_ooffset_t size = isize; struct vattr va; if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) return (0); while ((object = vp->v_object) != NULL) { VM_OBJECT_LOCK(object); if (!(object->flags & OBJ_DEAD)) { VM_OBJECT_UNLOCK(object); return (0); } VOP_UNLOCK(vp, 0); vm_object_set_flag(object, OBJ_DISCONNECTWNT); msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } if (size == 0) { if (vn_isdisk(vp, NULL)) { size = IDX_TO_OFF(INT_MAX); } else { if (VOP_GETATTR(vp, &va, td->td_ucred)) return (0); size = va.va_size; } } object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred); /* * Dereference the reference we just created. This assumes * that the object is associated with the vp. */ VM_OBJECT_LOCK(object); object->ref_count--; VM_OBJECT_UNLOCK(object); vrele(vp); KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object")); return (0); }
/* Try to invalidate pages, for "fs flush" or "fs flushv"; or * try to free pages, when deleting a file. * * Locking: the vcache entry's lock is held. It may be dropped and * re-obtained. * * Since we drop and re-obtain the lock, we can't guarantee that there won't * be some pages around when we return, newly created by concurrent activity. */ void osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync) { struct vnode *vp; int tries, code; int islocked; vp = AFSTOV(avc); VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { VI_UNLOCK(vp); return; } VI_UNLOCK(vp); islocked = islocked_vnode(vp); if (islocked == LK_EXCLOTHER) panic("Trying to Smush over someone else's lock"); else if (islocked == LK_SHARED) { afs_warn("Trying to Smush with a shared lock"); lock_vnode(vp, LK_UPGRADE); } else if (!islocked) lock_vnode(vp, LK_EXCLUSIVE); if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); /* * Do we really want OBJPC_SYNC? OBJPC_INVAL would be * faster, if invalidation is really what we are being * asked to do. (It would make more sense, too, since * otherwise this function is practically identical to * osi_VM_StoreAllSegments().) -GAW */ /* * Dunno. We no longer resemble osi_VM_StoreAllSegments, * though maybe that's wrong, now. And OBJPC_SYNC is the * common thing in 70 file systems, it seems. Matt. */ vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); } tries = 5; code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); while (code && (tries > 0)) { afs_warn("TryToSmush retrying vinvalbuf"); code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); --tries; } if (islocked == LK_SHARED) lock_vnode(vp, LK_DOWNGRADE); else if (!islocked) unlock_vnode(vp); }
/* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); }
/* * MPSAFE */ static void phys_pager_dealloc(vm_object_t object) { if (object->handle != NULL) { VM_OBJECT_UNLOCK(object); mtx_lock(&phys_pager_mtx); TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list); mtx_unlock(&phys_pager_mtx); VM_OBJECT_LOCK(object); } }
/* * Identify the physical page mapped at the given kernel virtual * address. Insert this physical page into the given address space at * the given virtual address, replacing the physical page, if any, * that already exists there. */ static int vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobject; vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, ("vm_pgmoveco: uaddr is not page aligned")); /* * Herein the physical page is validated and dirtied. It is * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; vm_page_lock_queues(); pmap_remove_all(user_pg); vm_page_free(user_pg); } else { /* * Even if a physical page does not exist in the * object chain's first object, a physical page from a * backing object may be mapped read only. */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); vm_page_lock_queues(); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); }
int ncl_inactive(struct vop_inactive_args *ap) { struct nfsnode *np; struct sillyrename *sp; struct vnode *vp = ap->a_vp; boolean_t retv; np = VTONFS(vp); if (NFS_ISV4(vp) && vp->v_type == VREG) { /* * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4 * Close operations are delayed until now. Any dirty * buffers/pages must be flushed before the close, so that the * stateid is available for the writes. */ if (vp->v_object != NULL) { VM_OBJECT_LOCK(vp->v_object); retv = vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_object); } else retv = TRUE; if (retv == TRUE) { (void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0); (void)nfsrpc_close(vp, 1, ap->a_td); } } mtx_lock(&np->n_mtx); if (vp->v_type != VDIR) { sp = np->n_sillyrename; np->n_sillyrename = NULL; } else sp = NULL; if (sp) { mtx_unlock(&np->n_mtx); (void) ncl_vinvalbuf(vp, 0, ap->a_td, 1); /* * Remove the silly file that was rename'd earlier */ ncl_removeit(sp, vp); crfree(sp->s_cred); TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp); taskqueue_enqueue(taskqueue_thread, &sp->s_task); mtx_lock(&np->n_mtx); } np->n_flag &= NMODIFIED; mtx_unlock(&np->n_mtx); return (0); }
static int rtR0MemObjFreeBSDAllocPhysPages(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment, bool fContiguous, int rcNoMem) { uint32_t cPages = atop(cb); vm_paddr_t VmPhysAddrHigh; /* create the object. */ PRTR0MEMOBJFREEBSD pMemFreeBSD = (PRTR0MEMOBJFREEBSD)rtR0MemObjNew(sizeof(*pMemFreeBSD), enmType, NULL, cb); if (!pMemFreeBSD) return VERR_NO_MEMORY; pMemFreeBSD->pObject = vm_object_allocate(OBJT_PHYS, atop(cb)); if (PhysHighest != NIL_RTHCPHYS) VmPhysAddrHigh = PhysHighest; else VmPhysAddrHigh = ~(vm_paddr_t)0; int rc = rtR0MemObjFreeBSDPhysAllocHelper(pMemFreeBSD->pObject, cPages, VmPhysAddrHigh, uAlignment, fContiguous, true, rcNoMem); if (RT_SUCCESS(rc)) { if (fContiguous) { Assert(enmType == RTR0MEMOBJTYPE_PHYS); #if __FreeBSD_version >= 1000030 VM_OBJECT_WLOCK(pMemFreeBSD->pObject); #else VM_OBJECT_LOCK(pMemFreeBSD->pObject); #endif pMemFreeBSD->Core.u.Phys.PhysBase = VM_PAGE_TO_PHYS(vm_page_find_least(pMemFreeBSD->pObject, 0)); #if __FreeBSD_version >= 1000030 VM_OBJECT_WUNLOCK(pMemFreeBSD->pObject); #else VM_OBJECT_UNLOCK(pMemFreeBSD->pObject); #endif pMemFreeBSD->Core.u.Phys.fAllocated = true; } *ppMem = &pMemFreeBSD->Core; } else { vm_object_deallocate(pMemFreeBSD->pObject); rtR0MemObjDelete(&pMemFreeBSD->Core); } return rc; }
static void unlock_and_deallocate(struct faultstate *fs) { vm_object_pip_wakeup(fs->object); VM_OBJECT_UNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_LOCK(fs->first_object); vm_page_lock(fs->first_m); vm_page_free(fs->first_m); vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_UNLOCK(fs->first_object); fs->first_m = NULL; } vm_object_deallocate(fs->first_object); unlock_map(fs); if (fs->vp != NULL) { vput(fs->vp); fs->vp = NULL; } VFS_UNLOCK_GIANT(fs->vfslocked); fs->vfslocked = 0; }
/* Purge VM for a file when its callback is revoked. * * Locking: No lock is held, not even the global lock. */ void osi_VM_FlushPages(struct vcache *avc, afs_ucred_t *credp) { struct vnode *vp; struct vm_object *obj; vp = AFSTOV(avc); ASSERT_VOP_LOCKED(vp, __func__); if (VOP_GETVOBJECT(vp, &obj) == 0) { VM_OBJECT_LOCK(obj); vm_object_page_remove(obj, 0, 0, FALSE); VM_OBJECT_UNLOCK(obj); } osi_vinvalbuf(vp, 0, 0, 0); }
int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage) { vm_object_t obj; vm_page_t from_page, to_page; int i; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); if (!persistent_swap_storage) { obj = swap_pager_alloc(NULL, IDX_TO_OFF(ttm->num_pages), VM_PROT_DEFAULT, 0); if (obj == NULL) { pr_err("Failed allocating swap storage\n"); return (-ENOMEM); } } else obj = persistent_swap_storage; VM_OBJECT_LOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); pmap_copy_page(VM_PAGE_TO_PHYS(from_page), VM_PAGE_TO_PHYS(to_page)); to_page->valid = VM_PAGE_BITS_ALL; vm_page_dirty(to_page); vm_page_wakeup(to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_UNLOCK(obj); ttm->bdev->driver->ttm_tt_unpopulate(ttm); ttm->swap_storage = obj; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistent_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP; return 0; }
/* Try to invalidate pages, for "fs flush" or "fs flushv"; or * try to free pages, when deleting a file. * * Locking: the vcache entry's lock is held. It may be dropped and * re-obtained. * * Since we drop and re-obtain the lock, we can't guarantee that there won't * be some pages around when we return, newly created by concurrent activity. */ void osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync) { struct vnode *vp; int tries, code; SPLVAR; vp = AFSTOV(avc); if (vp->v_iflag & VI_DOOMED) { USERPRI; return; } if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); /* * Do we really want OBJPC_SYNC? OBJPC_INVAL would be * faster, if invalidation is really what we are being * asked to do. (It would make more sense, too, since * otherwise this function is practically identical to * osi_VM_StoreAllSegments().) -GAW */ /* * Dunno. We no longer resemble osi_VM_StoreAllSegments, * though maybe that's wrong, now. And OBJPC_SYNC is the * common thing in 70 file systems, it seems. Matt. */ vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); } tries = 5; code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); while (code && (tries > 0)) { code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); --tries; } USERPRI; }
static void pscnv_gem_pager_dtor(void *handle) { struct drm_gem_object *gem_obj = handle; struct pscnv_bo *bo = gem_obj->driver_private; struct drm_device *dev = gem_obj->dev; vm_object_t devobj; DRM_LOCK(dev); devobj = cdev_pager_lookup(handle); if (devobj != NULL) { vm_size_t page_count = OFF_TO_IDX(bo->size); vm_page_t m; int i; VM_OBJECT_LOCK(devobj); for (i = 0; i < page_count; i++) { m = vm_page_lookup(devobj, i); if (!m) continue; if (pscnv_mem_debug > 0) NV_WARN(dev, "Freeing %010llx + %08llx (%p\n", bo->start, i * PAGE_SIZE, m); cdev_pager_free_page(devobj, m); } VM_OBJECT_UNLOCK(devobj); vm_object_deallocate(devobj); } else { DRM_UNLOCK(dev); NV_ERROR(dev, "Could not find handle %p bo %p\n", handle, bo); return; } if (pscnv_mem_debug > 0) NV_WARN(dev, "Freed %010llx (%p)\n", bo->start, bo); //kfree(bo->fake_pages); if (bo->chan) pscnv_chan_unref(bo->chan); else drm_gem_object_unreference_unlocked(gem_obj); DRM_UNLOCK(dev); }
/* Try to discard pages, in order to recycle a vcache entry. * * We also make some sanity checks: ref count, open count, held locks. * * We also do some non-VM-related chores, such as releasing the cred pointer * (for AIX and Solaris) and releasing the gnode (for AIX). * * Locking: afs_xvcache lock is held. If it is dropped and re-acquired, * *slept should be set to warn the caller. * * Formerly, afs_xvcache was dropped and re-acquired for Solaris, but now it * is not dropped and re-acquired for any platform. It may be that *slept is * therefore obsolescent. * * OSF/1 Locking: VN_LOCK has been called. * We do not lock the vnode here, but instead require that it be exclusive * locked by code calling osi_VM_StoreAllSegments directly, or scheduling it * from the bqueue - Matt * Maybe better to just call vnode_pager_setsize()? */ int osi_VM_FlushVCache(struct vcache *avc, int *slept) { struct vm_object *obj; struct vnode *vp; if (VREFCOUNT(avc) > 1) return EBUSY; if (avc->opens) return EBUSY; /* if a lock is held, give up */ if (CheckLock(&avc->lock)) return EBUSY; return(0); AFS_GUNLOCK(); vp = AFSTOV(avc); #ifndef AFS_FBSD70_ENV lock_vnode(vp); #endif if (VOP_GETVOBJECT(vp, &obj) == 0) { VM_OBJECT_LOCK(obj); vm_object_page_remove(obj, 0, 0, FALSE); #if 1 if (obj->ref_count == 0) { simple_lock(&vp->v_interlock); vgonel(vp, curthread); vp->v_tag = VT_AFS; SetAfsVnode(vp); } #endif VM_OBJECT_UNLOCK(obj); } #ifndef AFS_FBSD70_ENV unlock_vnode(vp); #endif AFS_GLOCK(); return 0; }
/* Try to store pages to cache, in order to store a file back to the server. * * Locking: the vcache entry's lock is held. It will usually be dropped and * re-obtained. */ void osi_VM_StoreAllSegments(struct vcache *avc) { struct vnode *vp; struct vm_object *obj; int anyio, tries; ReleaseWriteLock(&avc->lock); AFS_GUNLOCK(); tries = 5; vp = AFSTOV(avc); /* * I don't understand this. Why not just call vm_object_page_clean() * and be done with it? I particularly don't understand why we're calling * vget() here. Is there some reason to believe that the vnode might * be being recycled at this point? I don't think there's any need for * this loop, either -- if we keep the vnode locked all the time, * that and the object lock will prevent any new pages from appearing. * The loop is what causes the race condition. -GAW */ do { anyio = 0; obj = vp->v_object; if (obj != NULL && obj->flags & OBJ_MIGHTBEDIRTY) { if (!vget(vp, LK_EXCLUSIVE | LK_RETRY, curthread)) { obj = vp->v_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(obj); anyio = 1; } vput(vp); } } } while (anyio && (--tries > 0)); AFS_GLOCK(); ObtainWriteLock(&avc->lock, 94); }
/* * We require the caller to unmap the entire entry. This allows us to * safely decrement shm_kmappings when a mapping is removed. */ int shm_unmap(struct file *fp, void *mem, size_t size) { struct shmfd *shmfd; vm_map_entry_t entry; vm_offset_t kva, ofs; vm_object_t obj; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; vm_map_t map; int rv; if (fp->f_type != DTYPE_SHM) return (EINVAL); shmfd = fp->f_data; kva = (vm_offset_t)mem; ofs = kva & PAGE_MASK; kva = trunc_page(kva); size = round_page(size + ofs); map = kernel_map; rv = vm_map_lookup(&map, kva, VM_PROT_READ | VM_PROT_WRITE, &entry, &obj, &pindex, &prot, &wired); if (rv != KERN_SUCCESS) return (EINVAL); if (entry->start != kva || entry->end != kva + size) { vm_map_lookup_done(map, entry); return (EINVAL); } vm_map_lookup_done(map, entry); if (obj != shmfd->shm_object) return (EINVAL); vm_map_remove(map, kva, kva + size); VM_OBJECT_LOCK(obj); KASSERT(shmfd->shm_kmappings > 0, ("shm_unmap: object not mapped")); shmfd->shm_kmappings--; VM_OBJECT_UNLOCK(obj); return (0); }
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; long ahead, behind; int alloc_req, era, faultcount, nera, reqpage, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; int locked, error; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; fs.vfslocked = 0; faultcount = reqpage = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ VM_OBJECT_LOCK(fs.first_object); vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is dead, we stop here */ if (fs.object->flags & OBJ_DEAD) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * check for page-based copy on write. * We check fs.object == fs.first_object so * as to ensure the legacy COW mechanism is * used when the page in question is part of * a shadow object. Otherwise, vm_page_cowfault() * removes the page from the backing object, * which is not what we want. */ vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } /* * Wait/Retry if the page is busy. We have to do this * if the page is busy via either VPO_BUSY or * vm_page_t->busy because the vm_pager may be using * vm_page_t->busy for pageouts ( and even pageins if * it is the vnode pager ), and we could end up trying * to pagein and pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a vm_page_t->busy page except, perhaps, * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYLOCK( fs.first_object)) { VM_OBJECT_UNLOCK(fs.object); VM_OBJECT_LOCK(fs.first_object); VM_OBJECT_LOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_UNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, TRUE, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_UNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_pageq_remove(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_busy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } /* * Page is not resident, If this is the search termination * or the pager might contain the page, allocate a new page. */ if (TRYPAGER || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ fs.m = NULL; if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 if ((fs.object->flags & OBJ_COLORED) == 0) { fs.object->flags |= OBJ_COLORED; fs.object->pg_color = atop(vaddr) - fs.pindex; } #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have found a valid page or we have allocated a new page. * The page thus may not be valid or may not be entirely * valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (TRYPAGER) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; ahead = atop(fs.entry->end - vaddr) - 1; if (ahead > VM_FAULT_READ_AHEAD_MAX) ahead = VM_FAULT_READ_AHEAD_MAX; if (fs.pindex == fs.entry->next_read) vm_fault_cache_behind(&fs, VM_FAULT_READ_MAX); } else { /* * If this is a sequential page fault, then * arithmetically increase the number of pages * in the read-ahead window. Otherwise, reset * the read-ahead window to its smallest size. */ behind = atop(vaddr - fs.entry->start); if (behind > VM_FAULT_READ_BEHIND) behind = VM_FAULT_READ_BEHIND; ahead = atop(fs.entry->end - vaddr) - 1; era = fs.entry->read_ahead; if (fs.pindex == fs.entry->next_read) { nera = era + behind; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; behind = 0; if (ahead > nera) ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_cache_behind(&fs, VM_FAULT_CACHE_BEHIND); } else if (ahead > VM_FAULT_READ_AHEAD_MIN) ahead = VM_FAULT_READ_AHEAD_MIN; if (era != ahead) fs.entry->read_ahead = ahead; } /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are VPO_BUSY'd. */ unlock_map(&fs); vnode_lock: if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (VFS_NEEDSGIANT(vp->v_mount) && !fs.vfslocked) { fs.vfslocked = 1; if (!mtx_trylock(&Giant)) { VM_OBJECT_UNLOCK(fs.object); mtx_lock(&Giant); VM_OBJECT_LOCK(fs.object); goto vnode_lock; } } if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { int vfslocked; vfslocked = fs.vfslocked; fs.vfslocked = 0; /* Keep Giant */ vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; fs.vfslocked = vfslocked; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * now we find out if any other pages should be paged * in at this time this routine checks to see if the * pages surrounding this fault reside in the same * object as the page for this fault. If they do, * then they are faulted in also into the object. The * array "marray" returned contains an array of * vm_page_t structs where one of them is the * vm_page_t passed to the routine. The reqpage * return value is the index into the marray for the * vm_page_t passed to the routine. * * fs.m plus the additional pages are VPO_BUSY'd. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); rv = faultcount ? vm_pager_get_pages(fs.object, marray, faultcount, reqpage) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* * Found the page. Leave it busy while we play * with it. */ /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (!fs.m) { unlock_and_deallocate(&fs); goto RetryFault; } hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } /* * Remove the bogus page (which does not exist at this * object/offset); before doing so, we must get back * our object lock to preserve our invariant. * * Also wake up any other process that may want to bring * in this page. * * If this is the top-level object, we must leave the * busy page to prevent another process from rushing * past us, and inserting the page in that object at * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * Data outside the range of the pager or an I/O error */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this * point, m has been freed and is invalid! */ } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_UNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_LOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_LOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); VM_OBJECT_UNLOCK(fs.object); fs.object = next_object; } } KASSERT((fs.m->oflags & VPO_BUSY) != 0, ("vm_fault: not busy after main loop")); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * get rid of the unnecessary page */ vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_UNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_LOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. Since the pager does not return the * actual set of pages that it read, this update is based on * the requested set. Typically, the requested and actual * sets are the same. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + faultcount - reqpage; if ((prot & VM_PROT_WRITE) != 0 || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { if (fs.m->dirty == 0) fs.m->oflags |= VPO_NOSYNC; } else { fs.m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } } /* * Page had better still be busy */ KASSERT(fs.m->oflags & VPO_BUSY, ("vm_fault: page %p not busy!", fs.m)); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_UNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_LOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) curthread->td_ru.ru_majflt++; else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); }
/* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t src_readonly, upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0; /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif VM_OBJECT_LOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else { dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } access = prot = dst_entry->protection; /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the pages in the entry's range, copying each * one from the source object (it should be there) to the destination * object. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { /* * Allocate a page in the destination object. */ do { dst_m = vm_page_alloc(dst_object, dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_UNLOCK(dst_object); VM_WAIT; VM_OBJECT_LOCK(dst_object); } } while (dst_m == NULL); /* * Find the page in the source object, and copy it in. * (Because the source is wired down, the page will be in * memory.) */ VM_OBJECT_LOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && src_readonly && (backing_object = object->backing_object) != NULL) { /* * Allow fallback to backing objects if we are reading. */ VM_OBJECT_LOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); VM_OBJECT_UNLOCK(object); object = backing_object; } if (src_m == NULL) panic("vm_fault_copy_wired: page missing"); pmap_copy_page(src_m, dst_m); VM_OBJECT_UNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; VM_OBJECT_UNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_LOCK(dst_object); if (upgrade) { vm_page_lock(src_m); vm_page_unwire(src_m, 0); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_wakeup(dst_m); } VM_OBJECT_UNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } }
/* * If blocks are contiguous on disk, use this to provide clustered * read ahead. We will read as many blocks as possible sequentially * and then parcel them up into logical blocks in the buffer hash table. */ static struct buf * cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn, daddr_t blkno, long size, int run, int gbflags, struct buf *fbp) { struct bufobj *bo; struct buf *bp, *tbp; daddr_t bn; off_t off; long tinc, tsize; int i, inc, j, toff; KASSERT(size == vp->v_mount->mnt_stat.f_iosize, ("cluster_rbuild: size %ld != filesize %jd\n", size, (intmax_t)vp->v_mount->mnt_stat.f_iosize)); /* * avoid a division */ while ((u_quad_t) size * (lbn + run) > filesize) { --run; } if (fbp) { tbp = fbp; tbp->b_iocmd = BIO_READ; } else { tbp = getblk(vp, lbn, size, 0, 0, gbflags); if (tbp->b_flags & B_CACHE) return tbp; tbp->b_flags |= B_ASYNC | B_RAM; tbp->b_iocmd = BIO_READ; } tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) return tbp; bp = trypbuf(&cluster_pbuf_freecnt); if (bp == 0) return tbp; /* * We are synthesizing a buffer out of vm_page_t's, but * if the block size is not page aligned then the starting * address may not be either. Inherit the b_data offset * from the original buffer. */ bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO; if ((gbflags & GB_UNMAPPED) != 0) { bp->b_flags |= B_UNMAPPED; bp->b_data = unmapped_buf; } else { bp->b_data = (char *)((vm_offset_t)bp->b_data | ((vm_offset_t)tbp->b_data & PAGE_MASK)); } bp->b_iocmd = BIO_READ; bp->b_iodone = cluster_callback; bp->b_blkno = blkno; bp->b_lblkno = lbn; bp->b_offset = tbp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset")); pbgetvp(vp, bp); TAILQ_INIT(&bp->b_cluster.cluster_head); bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_npages = 0; inc = btodb(size); bo = &vp->v_bufobj; for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i != 0) { if ((bp->b_npages * PAGE_SIZE) + round_page(size) > vp->v_mount->mnt_iosize_max) { break; } tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT | (gbflags & GB_UNMAPPED)); /* Don't wait around for locked bufs. */ if (tbp == NULL) break; /* * Stop scanning if the buffer is fully valid * (marked B_CACHE), or locked (may be doing a * background write), or if the buffer is not * VMIO backed. The clustering code can only deal * with VMIO-backed buffers. */ BO_LOCK(bo); if ((tbp->b_vflags & BV_BKGRDINPROG) || (tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { BO_UNLOCK(bo); bqrelse(tbp); break; } BO_UNLOCK(bo); /* * The buffer must be completely invalid in order to * take part in the cluster. If it is partially valid * then we stop. */ off = tbp->b_offset; tsize = size; VM_OBJECT_LOCK(tbp->b_bufobj->bo_object); for (j = 0; tsize > 0; j++) { toff = off & PAGE_MASK; tinc = tsize; if (toff + tinc > PAGE_SIZE) tinc = PAGE_SIZE - toff; VM_OBJECT_LOCK_ASSERT(tbp->b_pages[j]->object, MA_OWNED); if ((tbp->b_pages[j]->valid & vm_page_bits(toff, tinc)) != 0) break; off += tinc; tsize -= tinc; } VM_OBJECT_UNLOCK(tbp->b_bufobj->bo_object); if (tsize > 0) { bqrelse(tbp); break; } /* * Set a read-ahead mark as appropriate */ if ((fbp && (i == 1)) || (i == (run - 1))) tbp->b_flags |= B_RAM; /* * Set the buffer up for an async read (XXX should * we do this only if we do not wind up brelse()ing?). * Set the block number if it isn't set, otherwise * if it is make sure it matches the block number we * expect. */ tbp->b_flags |= B_ASYNC; tbp->b_iocmd = BIO_READ; if (tbp->b_blkno == tbp->b_lblkno) { tbp->b_blkno = bn; } else if (tbp->b_blkno != bn) { brelse(tbp); break; } } /* * XXX fbp from caller may not be B_ASYNC, but we are going * to biodone() it in cluster_callback() anyway */ BUF_KERNPROC(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); VM_OBJECT_LOCK(tbp->b_bufobj->bo_object); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; vm_page_io_start(m); vm_object_pip_add(m->object, 1); if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { bp->b_pages[bp->b_npages] = m; bp->b_npages++; } if (m->valid == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } VM_OBJECT_UNLOCK(tbp->b_bufobj->bo_object); /* * Don't inherit tbp->b_bufsize as it may be larger due to * a non-page-aligned size. Instead just aggregate using * 'size'. */ if (tbp->b_bcount != size) printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size); if (tbp->b_bufsize != size) printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size); bp->b_bcount += size; bp->b_bufsize += size; } /* * Fully valid pages in the cluster are already good and do not need * to be re-read from disk. Replace the page with bogus_page */ VM_OBJECT_LOCK(bp->b_bufobj->bo_object); for (j = 0; j < bp->b_npages; j++) { VM_OBJECT_LOCK_ASSERT(bp->b_pages[j]->object, MA_OWNED); if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL) bp->b_pages[j] = bogus_page; } VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); if (bp->b_bufsize > bp->b_kvasize) panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); bp->b_kvasize = bp->b_bufsize; if ((bp->b_flags & B_UNMAPPED) == 0) { pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); } return (bp); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m, ma[1]; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) { VM_OBJECT_UNLOCK(object); return (EBUSY); } /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_lookup(object, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { vm_page_sleep(m, "shmtrc"); goto retry; } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, idx); } else /* A cached page was reactivated. */ rv = VM_PAGER_OK; vm_page_lock(m); if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); vm_page_wakeup(m); } else { vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_pager_page_unswapped(m); } } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }