/* --------------------------------------------------------------------- */ static int tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, vm_offset_t offset, size_t tlen, struct uio *uio) { vm_page_t m; int error, rv; VM_OBJECT_LOCK(tobj); m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(tobj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } VM_OBJECT_UNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); vm_page_lock(m); vm_page_unwire(m, TRUE); vm_page_unlock(m); vm_page_wakeup(m); VM_OBJECT_UNLOCK(tobj); return (error); }
/* * Obtain a page pointer array and lock all pages into system memory. A segmentation violation will * occur here if the calling user does not have access to the submitted address. */ static int via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) { unsigned long first_pfn = VIA_PFN(xfer->mem_addr); vm_page_t m; int i; vsg->num_pages = VIA_PFN(xfer->mem_addr + (xfer->num_lines * xfer->mem_stride -1)) - first_pfn + 1; if (NULL == (vsg->pages = malloc(sizeof(vm_page_t) * vsg->num_pages, DRM_MEM_DRIVER, M_NOWAIT))) return -ENOMEM; vsg->state = dr_via_pages_alloc; if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, (vm_offset_t)xfer->mem_addr, vsg->num_pages * PAGE_SIZE, VM_PROT_READ | VM_PROT_WRITE, vsg->pages, vsg->num_pages) < 0) return -EACCES; for (i = 0; i < vsg->num_pages; i++) { m = vsg->pages[i]; vm_page_lock(m); vm_page_wire(m); vm_page_unhold(m); vm_page_unlock(m); } vsg->state = dr_via_pages_locked; DRM_DEBUG("DMA pages locked\n"); return 0; }
/* * vm_fault_unwire: * * Unwire a range of virtual addresses in a map. */ void vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, boolean_t fictitious) { vm_paddr_t pa; vm_offset_t va; vm_page_t m; pmap_t pmap; pmap = vm_map_pmap(map); /* * Since the pages are wired down, we must be able to get their * mappings from the physical map system. */ for (va = start; va < end; va += PAGE_SIZE) { pa = pmap_extract(pmap, va); if (pa != 0) { pmap_change_wiring(pmap, va, FALSE); if (!fictitious) { m = PHYS_TO_VM_PAGE(pa); vm_page_lock(m); vm_page_unwire(m, TRUE); vm_page_unlock(m); } } } }
/* * Function that frees up all resources for a blit. It is usable even if the * blit info has only been partially built as long as the status enum is consistent * with the actual status of the used resources. */ static void via_free_sg_info(drm_via_sg_info_t *vsg) { vm_page_t page; int i; switch(vsg->state) { case dr_via_device_mapped: via_unmap_blit_from_device(vsg); case dr_via_desc_pages_alloc: for (i=0; i<vsg->num_desc_pages; ++i) { if (vsg->desc_pages[i] != NULL) free(vsg->desc_pages[i], DRM_MEM_PAGES); } free(vsg->desc_pages, DRM_MEM_DRIVER); case dr_via_pages_locked: for (i=0; i < vsg->num_pages; ++i) { page = vsg->pages[i]; vm_page_lock(page); vm_page_unwire(page, 0); vm_page_unlock(page); } case dr_via_pages_alloc: free(vsg->pages, DRM_MEM_DRIVER); default: vsg->state = dr_via_sg_init; } free(vsg->bounce_buffer, DRM_MEM_DRIVER); vsg->bounce_buffer = NULL; vsg->free_on_sequence = 0; }
/* --------------------------------------------------------------------- */ static int tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, vm_offset_t offset, size_t tlen, struct uio *uio) { vm_page_t m; int error; VM_OBJECT_LOCK(tobj); vm_object_pip_add(tobj, 1); m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { error = vm_pager_get_pages(tobj, &m, 1, 0); if (error != 0) { printf("tmpfs get pages from pager error [read]\n"); goto out; } } else vm_page_zero_invalid(m, TRUE); } VM_OBJECT_UNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); out: vm_page_lock(m); vm_page_unwire(m, TRUE); vm_page_unlock(m); vm_page_wakeup(m); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); return (error); }
/* * Same as above, but forces the page to be detached from the object * and go into free pool. */ void sf_ext_free_nocache(void *arg1, void *arg2) { struct sf_buf *sf = arg1; struct sendfile_sync *sfs = arg2; vm_page_t pg = sf_buf_page(sf); sf_buf_free(sf); vm_page_lock(pg); if (vm_page_unwire(pg, PQ_NONE)) { vm_object_t obj; /* Try to free the page, but only if it is cheap to. */ if ((obj = pg->object) == NULL) vm_page_free(pg); else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { vm_page_free(pg); VM_OBJECT_WUNLOCK(obj); } else vm_page_deactivate(pg); } vm_page_unlock(pg); if (sfs != NULL) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } }
/* * Detach mapped page and release resources back to the system. Called * by mbuf(9) code when last reference to a page is freed. */ void sf_ext_free(void *arg1, void *arg2) { struct sf_buf *sf = arg1; struct sendfile_sync *sfs = arg2; vm_page_t pg = sf_buf_page(sf); sf_buf_free(sf); vm_page_lock(pg); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ if (vm_page_unwire(pg, PQ_INACTIVE) && pg->object == NULL) vm_page_free(pg); vm_page_unlock(pg); if (sfs != NULL) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } }
/* * Fill as many pages as vm_fault has allocated for us. */ static int phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) { int i; VM_OBJECT_ASSERT_WLOCKED(object); for (i = 0; i < count; i++) { if (m[i]->valid == 0) { if ((m[i]->flags & PG_ZERO) == 0) pmap_zero_page(m[i]); m[i]->valid = VM_PAGE_BITS_ALL; } KASSERT(m[i]->valid == VM_PAGE_BITS_ALL, ("phys_pager_getpages: partially valid page %p", m[i])); KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); /* The requested page must remain busy, the others not. */ if (i == reqpage) { vm_page_lock(m[i]); vm_page_flash(m[i]); vm_page_unlock(m[i]); } else vm_page_xunbusy(m[i]); } return (VM_PAGER_OK); }
/* * Return the requested word from the user-space address. * Returns 0 if it isn't mapped. (For what we're using it * for, if the actual value is 0, that's equivalent to that.) */ static caddr_t GET_WORD(pmap_t map, caddr_t virtual_addr) { caddr_t retval = 0; vm_page_t page; int err; caddr_t old_fault; page = pmap_extract_and_hold(map, (vm_offset_t)virtual_addr, VM_PROT_READ); if (page == 0) { return 0; } // I do this because copyin/copyout aren't re-entrant. old_fault = curpcb->pcb_onfault; err = copyin(virtual_addr, &retval, sizeof(retval)); curpcb->pcb_onfault = old_fault; if (err != 0) { #if SAMPLE_DEBUG printf("%s(%d): copyin(%p, %p, %zd) failed: %d\n", __FUNCTION__, __LINE__, (void*)virtual_addr, &retval, sizeof(retval), err); #endif retval = 0; } vm_page_lock(page); vm_page_unhold(page); vm_page_unlock(page); return retval; }
void vm_gpa_release(void *cookie) { vm_page_t m = cookie; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); }
static inline void release_page(struct faultstate *fs) { vm_page_wakeup(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); fs->m = NULL; }
/* * Release a page we've previously wired. */ static void zbuf_page_free(vm_page_t pp) { vm_page_lock(pp); vm_page_unwire(pp, 0); if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); vm_page_unlock(pp); }
/* * Identify the physical page mapped at the given kernel virtual * address. Insert this physical page into the given address space at * the given virtual address, replacing the physical page, if any, * that already exists there. */ static int vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobject; vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, ("vm_pgmoveco: uaddr is not page aligned")); /* * Herein the physical page is validated and dirtied. It is * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; vm_page_lock(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); vm_page_unlock(user_pg); } else { /* * Even if a physical page does not exist in the * object chain's first object, a physical page from a * backing object may be mapped read only. */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); }
static void unwire_ddp_buffer(struct ddp_buffer *db) { int i; vm_page_t p; for (i = 0; i < db->npages; i++) { p = db->pages[i]; vm_page_lock(p); vm_page_unwire(p, PQ_INACTIVE); vm_page_unlock(p); } }
void exec_unmap_first_page(struct image_params *imgp) { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } }
void cdev_pager_free_page(vm_object_t object, vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); vm_page_lock(m); vm_page_remove(m); vm_page_unlock(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); }
static void wire_ddp_buffer(struct ddp_buffer *db) { int i; vm_page_t p; for (i = 0; i < db->npages; i++) { p = db->pages[i]; vm_page_lock(p); vm_page_wire(p); vm_page_unhold(p); vm_page_unlock(p); } }
int ttm_tt_swapin(struct ttm_tt *ttm) { vm_object_t obj; vm_page_t from_page, to_page; int i, ret, rv; obj = ttm->swap_storage; VM_OBJECT_WLOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { from_page = vm_page_grab(obj, i, VM_ALLOC_NOBUSY | VM_ALLOC_RETRY); if (from_page->valid != VM_PAGE_BITS_ALL) { vm_page_busy(from_page); if (vm_pager_has_page(obj, i, NULL, NULL)) { rv = vm_pager_get_pages(obj, &from_page, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(from_page); vm_page_free(from_page); vm_page_unlock(from_page); ret = -EIO; goto err_ret; } } else vm_page_zero_invalid(from_page, TRUE); vm_page_wakeup(from_page); } to_page = ttm->pages[i]; if (unlikely(to_page == NULL)) { ret = -ENOMEM; goto err_ret; } pmap_copy_page(from_page, to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_WUNLOCK(obj); if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP)) vm_object_deallocate(obj); ttm->swap_storage = NULL; ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED; return (0); err_ret: vm_object_pip_wakeup(obj); VM_OBJECT_WUNLOCK(obj); return (ret); }
/* * Speed up the reclamation of up to "distance" pages that precede the * faulting pindex within the first object of the shadow chain. */ static void vm_fault_cache_behind(const struct faultstate *fs, int distance) { vm_object_t first_object, object; vm_page_t m, m_prev; vm_pindex_t pindex; object = fs->object; VM_OBJECT_ASSERT_WLOCKED(object); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYWLOCK(first_object)) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WLOCK(first_object); VM_OBJECT_WLOCK(object); } } /* Neither fictitious nor unmanaged pages can be cached. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { if (fs->first_pindex < distance) pindex = 0; else pindex = fs->first_pindex - distance; if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; KASSERT((m->oflags & VPO_BUSY) != 0, ("vm_fault_cache_behind: page %p is not busy", m)); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { pmap_remove_all(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (m->dirty != 0) vm_page_deactivate(m); else vm_page_cache(m); } vm_page_unlock(m); } } if (first_object != object) VM_OBJECT_WUNLOCK(first_object); }
/* * Speed up the reclamation of up to "distance" pages that precede the * faulting pindex within the first object of the shadow chain. */ static void vm_fault_cache_behind(const struct faultstate *fs, int distance) { vm_object_t first_object, object; vm_page_t m, m_prev; vm_pindex_t pindex; object = fs->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYLOCK(first_object)) { VM_OBJECT_UNLOCK(object); VM_OBJECT_LOCK(first_object); VM_OBJECT_LOCK(object); } } if (first_object->type != OBJT_DEVICE && first_object->type != OBJT_PHYS && first_object->type != OBJT_SG) { if (fs->first_pindex < distance) pindex = 0; else pindex = fs->first_pindex - distance; if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; KASSERT((m->oflags & VPO_BUSY) != 0, ("vm_fault_cache_behind: page %p is not busy", m)); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { pmap_remove_all(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (m->dirty != 0) vm_page_deactivate(m); else vm_page_cache(m); } vm_page_unlock(m); } } if (first_object != object) VM_OBJECT_UNLOCK(first_object); }
/* * Given a user pointer to a page of user memory, return an sf_buf for the * page. Because we may be requesting quite a few sf_bufs, prefer failure to * deadlock and use SFB_NOWAIT. */ static struct sf_buf * zbuf_sfbuf_get(struct vm_map *map, vm_offset_t uaddr) { struct sf_buf *sf; vm_page_t pp; if (vm_fault_quick_hold_pages(map, uaddr, PAGE_SIZE, VM_PROT_READ | VM_PROT_WRITE, &pp, 1) < 0) return (NULL); vm_page_lock(pp); vm_page_wire(pp); vm_page_unhold(pp); vm_page_unlock(pp); sf = sf_buf_alloc(pp, SFB_NOWAIT); if (sf == NULL) { zbuf_page_free(pp); return (NULL); } return (sf); }
static void unlock_and_deallocate(struct faultstate *fs) { vm_object_pip_wakeup(fs->object); VM_OBJECT_WUNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_WLOCK(fs->first_object); vm_page_lock(fs->first_m); vm_page_free(fs->first_m); vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_WUNLOCK(fs->first_object); fs->first_m = NULL; } vm_object_deallocate(fs->first_object); unlock_map(fs); if (fs->vp != NULL) { vput(fs->vp); fs->vp = NULL; } }
static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct privcmd_map *map = object->handle; vm_pindex_t pidx; vm_page_t page, oldm; if (map->mapped != true) return (VM_PAGER_FAIL); pidx = OFF_TO_IDX(offset); if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err)) return (VM_PAGER_FAIL); page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset); if (page == NULL) return (VM_PAGER_FAIL); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); if (*mres != NULL) { oldm = *mres; vm_page_lock(oldm); vm_page_free(oldm); vm_page_unlock(oldm); *mres = NULL; } vm_page_insert(page, object, pidx); page->valid = VM_PAGE_BITS_ALL; vm_page_xbusy(page); *mres = page; return (VM_PAGER_OK); }
static void socow_iodone(void *addr, void *args) { struct sf_buf *sf; vm_page_t pp; sf = args; pp = sf_buf_page(sf); sf_buf_free(sf); /* remove COW mapping */ vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); vm_page_unlock(pp); socow_stats.iodone++; }
/* See: old_dev_pager_fault() in device_pager.c as an example. */ static int cheri_compositor_cfb_pg_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_pindex_t pidx; vm_paddr_t paddr; vm_page_t page; struct cfb_vm_object *cfb_vm_obj; struct cdev *dev; struct cheri_compositor_softc *sc; struct cdevsw *csw; vm_memattr_t memattr; int ref; int retval; pidx = OFF_TO_IDX(offset); VM_OBJECT_WUNLOCK(vm_obj); cfb_vm_obj = vm_obj->handle; dev = cfb_vm_obj->dev; sc = dev->si_drv1; retval = VM_PAGER_OK; CHERI_COMPOSITOR_DEBUG(sc, "vm_obj: %p, offset: %lu, prot: %i", vm_obj, offset, prot); csw = dev_refthread(dev, &ref); if (csw == NULL) { retval = VM_PAGER_FAIL; goto done_unlocked; } /* Traditional d_mmap() call. */ CHERI_COMPOSITOR_DEBUG(sc, "offset: %lu, nprot: %i", offset, prot); if (validate_prot_and_offset(sc, cfb_vm_obj->pool->mapped_fd, prot, offset) != 0) { retval = VM_PAGER_FAIL; goto done_unlocked; } paddr = calculate_physical_address(sc, cfb_vm_obj->pool, offset); memattr = VM_MEMATTR_UNCACHEABLE; CHERI_COMPOSITOR_DEBUG(sc, "paddr: %p, memattr: %i", (void *) paddr, memattr); dev_relthread(dev, ref); /* Sanity checks. */ KASSERT((((*mres)->flags & PG_FICTITIOUS) == 0), ("Expected non-fictitious page.")); /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(vm_obj); vm_page_lock(*mres); vm_page_free(*mres); vm_page_unlock(*mres); *mres = page; vm_page_insert(page, vm_obj, pidx); page->valid = VM_PAGE_BITS_ALL; /* Success! */ retval = VM_PAGER_OK; goto done; done_unlocked: VM_OBJECT_WLOCK(vm_obj); done: CHERI_COMPOSITOR_DEBUG(sc, "Finished with mres: %p (retval: %i)", *mres, retval); return (retval); }
/* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t src_readonly, upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0; /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else { dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } access = prot = dst_entry->protection; /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object does share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { /* * Allocate a page in the destination object. */ do { dst_m = vm_page_alloc(dst_object, dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_WAIT; VM_OBJECT_WLOCK(dst_object); } } while (dst_m == NULL); /* * Find the page in the source object, and copy it in. * (Because the source is wired down, the page will be in * memory.) */ VM_OBJECT_WLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && src_readonly && (backing_object = object->backing_object) != NULL) { /* * Allow fallback to backing objects if we are reading. */ VM_OBJECT_WLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); VM_OBJECT_WUNLOCK(object); object = backing_object; } if (src_m == NULL) panic("vm_fault_copy_wired: page missing"); pmap_copy_page(src_m, dst_m); VM_OBJECT_WUNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; dst_m->dirty = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { vm_page_lock(src_m); vm_page_unwire(src_m, 0); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_wakeup(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } }
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; long ahead, behind; int alloc_req, era, faultcount, nera, reqpage, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; int locked, error; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = reqpage = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ VM_OBJECT_WLOCK(fs.first_object); vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is dead, we stop here */ if (fs.object->flags & OBJ_DEAD) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * check for page-based copy on write. * We check fs.object == fs.first_object so * as to ensure the legacy COW mechanism is * used when the page in question is part of * a shadow object. Otherwise, vm_page_cowfault() * removes the page from the backing object, * which is not what we want. */ vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } /* * Wait/Retry if the page is busy. We have to do this * if the page is busy via either VPO_BUSY or * vm_page_t->busy because the vm_pager may be using * vm_page_t->busy for pageouts ( and even pageins if * it is the vnode pager ), and we could end up trying * to pagein and pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a vm_page_t->busy page except, perhaps, * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, TRUE, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_page_remque(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_busy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } /* * Page is not resident, If this is the search termination * or the pager might contain the page, allocate a new page. */ if (TRYPAGER || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ fs.m = NULL; if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 if ((fs.object->flags & OBJ_COLORED) == 0) { fs.object->flags |= OBJ_COLORED; fs.object->pg_color = atop(vaddr) - fs.pindex; } #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have found a valid page or we have allocated a new page. * The page thus may not be valid or may not be entirely * valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (TRYPAGER) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; ahead = atop(fs.entry->end - vaddr) - 1; if (ahead > VM_FAULT_READ_AHEAD_MAX) ahead = VM_FAULT_READ_AHEAD_MAX; if (fs.pindex == fs.entry->next_read) vm_fault_cache_behind(&fs, VM_FAULT_READ_MAX); } else { /* * If this is a sequential page fault, then * arithmetically increase the number of pages * in the read-ahead window. Otherwise, reset * the read-ahead window to its smallest size. */ behind = atop(vaddr - fs.entry->start); if (behind > VM_FAULT_READ_BEHIND) behind = VM_FAULT_READ_BEHIND; ahead = atop(fs.entry->end - vaddr) - 1; era = fs.entry->read_ahead; if (fs.pindex == fs.entry->next_read) { nera = era + behind; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; behind = 0; if (ahead > nera) ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_cache_behind(&fs, VM_FAULT_CACHE_BEHIND); } else if (ahead > VM_FAULT_READ_AHEAD_MIN) ahead = VM_FAULT_READ_AHEAD_MIN; if (era != ahead) fs.entry->read_ahead = ahead; } /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are VPO_BUSY'd. */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * now we find out if any other pages should be paged * in at this time this routine checks to see if the * pages surrounding this fault reside in the same * object as the page for this fault. If they do, * then they are faulted in also into the object. The * array "marray" returned contains an array of * vm_page_t structs where one of them is the * vm_page_t passed to the routine. The reqpage * return value is the index into the marray for the * vm_page_t passed to the routine. * * fs.m plus the additional pages are VPO_BUSY'd. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); rv = faultcount ? vm_pager_get_pages(fs.object, marray, faultcount, reqpage) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* * Found the page. Leave it busy while we play * with it. */ /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (!fs.m) { unlock_and_deallocate(&fs); goto RetryFault; } hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } /* * Remove the bogus page (which does not exist at this * object/offset); before doing so, we must get back * our object lock to preserve our invariant. * * Also wake up any other process that may want to bring * in this page. * * If this is the top-level object, we must leave the * busy page to prevent another process from rushing * past us, and inserting the page in that object at * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * Data outside the range of the pager or an I/O error */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this * point, m has been freed and is invalid! */ } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } KASSERT((fs.m->oflags & VPO_BUSY) != 0, ("vm_fault: not busy after main loop")); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * get rid of the unnecessary page */ vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. Since the pager does not return the * actual set of pages that it read, this update is based on * the requested set. Typically, the requested and actual * sets are the same. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + faultcount - reqpage; if ((prot & VM_PROT_WRITE) != 0 || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { if (fs.m->dirty == 0) fs.m->oflags |= VPO_NOSYNC; } else { fs.m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } } /* * Page had better still be busy */ KASSERT(fs.m->oflags & VPO_BUSY, ("vm_fault: page %p not busy!", fs.m)); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { PCPU_INC(cnt.v_io_faults); curthread->td_ru.ru_majflt++; } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); }
/* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); count = howmany(end - addr, PAGE_SIZE); if (count > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); } return (-1); }
static int sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) { struct sglist *sg; vm_page_t m_paddr, page; vm_pindex_t offset; vm_paddr_t paddr; vm_memattr_t memattr; size_t space; int i; VM_OBJECT_ASSERT_WLOCKED(object); sg = object->handle; memattr = object->memattr; VM_OBJECT_WUNLOCK(object); offset = m[reqpage]->pindex; /* * Lookup the physical address of the requested page. An initial * value of '1' instead of '0' is used so we can assert that the * page is found since '0' can be a valid page-aligned physical * address. */ space = 0; paddr = 1; for (i = 0; i < sg->sg_nseg; i++) { if (space + sg->sg_segs[i].ss_len <= (offset * PAGE_SIZE)) { space += sg->sg_segs[i].ss_len; continue; } paddr = sg->sg_segs[i].ss_paddr + offset * PAGE_SIZE - space; break; } KASSERT(paddr != 1, ("invalid SG page index")); /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && pmap_page_get_memattr(m_paddr) != memattr) { memattr = pmap_page_get_memattr(m_paddr); printf( "WARNING: A device driver has set \"memattr\" inconsistently.\n"); } /* Return a fake page for the requested page. */ KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS), ("backing page for SG is fake")); /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); /* Free the original pages and insert this fake page into the object. */ for (i = 0; i < count; i++) { if (i == reqpage && vm_page_replace(page, object, offset) != m[i]) panic("sg_pager_getpages: invalid place replacement"); vm_page_lock(m[i]); vm_page_free(m[i]); vm_page_unlock(m[i]); } m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); }
int socow_setup(struct mbuf *m0, struct uio *uio) { struct sf_buf *sf; vm_page_t pp; struct iovec *iov; struct vmspace *vmspace; struct vm_map *map; vm_offset_t offset, uva; socow_stats.attempted++; vmspace = curproc->p_vmspace; map = &vmspace->vm_map; uva = (vm_offset_t) uio->uio_iov->iov_base; offset = uva & PAGE_MASK; /* * Verify that access to the given address is allowed from user-space. */ if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0) return (0); /* * verify page is mapped & not already wired for i/o */ pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ); if (pp == NULL) { socow_stats.fail_not_mapped++; return(0); } /* * set up COW */ vm_page_lock(pp); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); vm_page_unlock(pp); return (0); } /* * wire the page for I/O */ vm_page_wire(pp); vm_page_unhold(pp); vm_page_unlock(pp); /* * Allocate an sf buf */ sf = sf_buf_alloc(pp, SFB_CATCH); if (sf == NULL) { vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); vm_page_unlock(pp); socow_stats.fail_sf_buf++; return(0); } /* * attach to mbuf */ MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); m0->m_len = PAGE_SIZE - offset; m0->m_data = (caddr_t)sf_buf_kva(sf) + offset; socow_stats.success++; iov = uio->uio_iov; iov->iov_base = (char *)iov->iov_base + m0->m_len; iov->iov_len -= m0->m_len; uio->uio_resid -= m0->m_len; uio->uio_offset += m0->m_len; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; } return(m0->m_len); }