int ttm_bo_mmap_single(struct ttm_bo_device *bdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **obj_res, int nprot) { struct ttm_bo_driver *driver; struct ttm_buffer_object *bo; struct vm_object *vm_obj; int ret; rw_wlock(&bdev->vm_lock); bo = ttm_bo_vm_lookup_rb(bdev, OFF_TO_IDX(*offset), OFF_TO_IDX(size)); if (likely(bo != NULL)) refcount_acquire(&bo->kref); rw_wunlock(&bdev->vm_lock); if (unlikely(bo == NULL)) { printf("[TTM] Could not find buffer object to map\n"); return (EINVAL); } driver = bo->bdev->driver; if (unlikely(!driver->verify_access)) { ret = EPERM; goto out_unref; } ret = -driver->verify_access(bo); if (unlikely(ret != 0)) goto out_unref; vm_obj = cdev_pager_allocate(bo, OBJT_MGTDEVICE, &ttm_pager_ops, size, nprot, 0, curthread->td_ucred); if (vm_obj == NULL) { ret = EINVAL; goto out_unref; } /* * Note: We're transferring the bo reference to vm_obj->handle here. */ *offset = 0; *obj_res = vm_obj; return 0; out_unref: ttm_bo_unref(&bo); return ret; }
/* * MPSAFE */ static vm_object_t phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { vm_object_t object, object1; vm_pindex_t pindex; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); pindex = OFF_TO_IDX(foff + PAGE_MASK + size); if (handle != NULL) { mtx_lock(&phys_pager_mtx); /* * Look up pager, creating as necessary. */ object1 = NULL; object = vm_pager_object_lookup(&phys_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. */ mtx_unlock(&phys_pager_mtx); object1 = vm_object_allocate(OBJT_PHYS, pindex); mtx_lock(&phys_pager_mtx); object = vm_pager_object_lookup(&phys_pager_object_list, handle); if (object != NULL) { /* * We raced with other thread while * allocating object. */ if (pindex > object->size) object->size = pindex; } else { object = object1; object1 = NULL; object->handle = handle; TAILQ_INSERT_TAIL(&phys_pager_object_list, object, pager_object_list); } } else { if (pindex > object->size) object->size = pindex; } mtx_unlock(&phys_pager_mtx); vm_object_deallocate(object1); } else { object = vm_object_allocate(OBJT_PHYS, pindex); } return (object); }
/* * no_pager_alloc just returns an initialized object. */ static vm_object_t default_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset) { if (handle != NULL) panic("default_pager_alloc: handle specified"); return vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(round_page(offset + size))); }
int drm_legacy_sg_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_scatter_gather *request = data; struct drm_sg_mem *entry; vm_size_t size; vm_pindex_t pindex; if (dev->sg) return -EINVAL; DRM_DEBUG("request size=%ld\n", request->size); entry = kmalloc(sizeof(*entry), M_DRM, M_WAITOK | M_ZERO); size = round_page(request->size); entry->pages = OFF_TO_IDX(size); entry->busaddr = kmalloc(entry->pages * sizeof(*entry->busaddr), M_DRM, M_WAITOK | M_ZERO); entry->vaddr = kmem_alloc_attr(&kernel_map, size, VM_SUBSYS_DRM_SCAT, M_WAITOK | M_ZERO, 0, BUS_SPACE_MAXADDR_32BIT, VM_MEMATTR_WRITE_COMBINING); if (entry->vaddr == 0) { drm_sg_cleanup(entry); return (-ENOMEM); } for(pindex = 0; pindex < entry->pages; pindex++) { entry->busaddr[pindex] = vtophys(entry->vaddr + IDX_TO_OFF(pindex)); } DRM_LOCK(dev); if (dev->sg) { DRM_UNLOCK(dev); drm_sg_cleanup(entry); return (-EINVAL); } dev->sg = entry; DRM_UNLOCK(dev); request->handle = entry->vaddr; DRM_DEBUG("allocated %ju pages @ 0x%08jx, contents=%08lx\n", entry->pages, (uintmax_t)entry->vaddr, *(unsigned long *)entry->vaddr); return (0); }
int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request) { struct drm_sg_mem *entry; vm_size_t size; vm_pindex_t pindex; DRM_DEBUG("\n"); if (!drm_core_check_feature(dev, DRIVER_SG)) return -EINVAL; if (dev->sg) return -EINVAL; entry = malloc(sizeof(*entry), DRM_MEM_DRIVER, M_NOWAIT | M_ZERO); if (!entry) return -ENOMEM; DRM_DEBUG("request size=%ld\n", request->size); size = round_page(request->size); entry->pages = OFF_TO_IDX(size); entry->busaddr = malloc(entry->pages * sizeof(*entry->busaddr), DRM_MEM_SGLISTS, M_NOWAIT | M_ZERO); if (!entry->busaddr) { free(entry, DRM_MEM_DRIVER); return -ENOMEM; } entry->vaddr = drm_vmalloc_dma(size); if (entry->vaddr == 0) { free(entry->busaddr, DRM_MEM_DRIVER); free(entry, DRM_MEM_DRIVER); return -ENOMEM; } for (pindex = 0; pindex < entry->pages; pindex++) { entry->busaddr[pindex] = vtophys(entry->vaddr + IDX_TO_OFF(pindex)); } request->handle = entry->vaddr; dev->sg = entry; DRM_DEBUG("allocated %ju pages @ 0x%08zx, contents=%08lx\n", entry->pages, entry->vaddr, *(unsigned long *)entry->vaddr); return 0; }
static vm_object_t sg_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { struct sglist *sg; vm_object_t object; vm_pindex_t npages, pindex; int i; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); /* * The scatter/gather list must only include page-aligned * ranges. */ npages = 0; sg = handle; for (i = 0; i < sg->sg_nseg; i++) { if ((sg->sg_segs[i].ss_paddr % PAGE_SIZE) != 0 || (sg->sg_segs[i].ss_len % PAGE_SIZE) != 0) return (NULL); npages += sg->sg_segs[i].ss_len / PAGE_SIZE; } /* * The scatter/gather list has a fixed size. Refuse requests * to map beyond that. */ size = round_page(size); pindex = OFF_TO_IDX(foff + size); if (pindex > npages) return (NULL); /* * Allocate a new object and associate it with the * scatter/gather list. It is ok for our purposes to have * multiple VM objects associated with the same scatter/gather * list because scatter/gather lists are static. This is also * simpler than ensuring a unique object per scatter/gather * list. */ object = vm_object_allocate(OBJT_SG, npages); object->handle = sglist_hold(sg); TAILQ_INIT(&object->un_pager.sgp.sgp_pglist); return (object); }
static int netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct netmap_vm_handle_t *vmh = object->handle; struct netmap_priv_d *priv = vmh->priv; vm_paddr_t paddr; vm_page_t page; vm_memattr_t memattr; vm_pindex_t pidx; ND("object %p offset %jd prot %d mres %p", object, (intmax_t)offset, prot, mres); memattr = object->memattr; pidx = OFF_TO_IDX(offset); paddr = netmap_mem_ofstophys(priv->np_mref, offset); if (paddr == 0) return VM_PAGER_FAIL; if (((*mres)->flags & PG_FICTITIOUS) != 0) { /* * If the passed in result page is a fake page, update it with * the new physical address. */ page = *mres; vm_page_updatefake(page, paddr, memattr); } else { /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK #define VM_OBJECT_WLOCK VM_OBJECT_LOCK #endif /* VM_OBJECT_WUNLOCK */ VM_OBJECT_WUNLOCK(object); page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); vm_page_free(*mres); *mres = page; vm_page_insert(page, object, pidx); } page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); }
static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_paddr_t paddr; vm_page_t page; vm_offset_t pidx = OFF_TO_IDX(offset); cdev_t dev; page = *mres; dev = object->handle; paddr = pmap_phys_address( dev_dmmap(dev, offset, prot, NULL)); KASSERT(paddr != -1,("dev_pager_getpage: map function returns error")); KKASSERT(object->type == OBJT_DEVICE); if (page->flags & PG_FICTITIOUS) { /* * If the passed in reqpage page is already a fake page, * update it with the new physical address. */ page->phys_addr = paddr; page->valid = VM_PAGE_BITS_ALL; } else { /* * Replace the passed in reqpage page with our own fake page * and free up all the original pages. */ page = dev_pager_getfake(paddr, object->memattr); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); vm_object_hold(object); vm_page_free(*mres); if (vm_page_insert(page, object, pidx) == FALSE) { panic("dev_pager_getpage: page (%p,%016jx) exists", object, (uintmax_t)pidx); } vm_object_drop(object); } return (VM_PAGER_OK); }
static void pscnv_gem_pager_dtor(void *handle) { struct drm_gem_object *gem_obj = handle; struct pscnv_bo *bo = gem_obj->driver_private; struct drm_device *dev = gem_obj->dev; vm_object_t devobj; DRM_LOCK(dev); devobj = cdev_pager_lookup(handle); if (devobj != NULL) { vm_size_t page_count = OFF_TO_IDX(bo->size); vm_page_t m; int i; VM_OBJECT_LOCK(devobj); for (i = 0; i < page_count; i++) { m = vm_page_lookup(devobj, i); if (!m) continue; if (pscnv_mem_debug > 0) NV_WARN(dev, "Freeing %010llx + %08llx (%p\n", bo->start, i * PAGE_SIZE, m); cdev_pager_free_page(devobj, m); } VM_OBJECT_UNLOCK(devobj); vm_object_deallocate(devobj); } else { DRM_UNLOCK(dev); NV_ERROR(dev, "Could not find handle %p bo %p\n", handle, bo); return; } if (pscnv_mem_debug > 0) NV_WARN(dev, "Freed %010llx (%p)\n", bo->start, bo); //kfree(bo->fake_pages); if (bo->chan) pscnv_chan_unref(bo->chan); else drm_gem_object_unreference_unlocked(gem_obj); DRM_UNLOCK(dev); }
static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct privcmd_map *map = object->handle; vm_pindex_t pidx; vm_page_t page, oldm; if (map->mapped != true) return (VM_PAGER_FAIL); pidx = OFF_TO_IDX(offset); if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err)) return (VM_PAGER_FAIL); page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset); if (page == NULL) return (VM_PAGER_FAIL); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); if (*mres != NULL) { oldm = *mres; vm_page_lock(oldm); vm_page_free(oldm); vm_page_unlock(oldm); *mres = NULL; } vm_page_insert(page, object, pidx); page->valid = VM_PAGE_BITS_ALL; vm_page_xbusy(page); *mres = page; return (VM_PAGER_OK); }
/* * no_pager_alloc just returns an initialized object. */ static vm_object_t default_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; if (handle != NULL) panic("default_pager_alloc: handle specified"); if (cred != NULL) { if (!swap_reserve_by_cred(size, cred)) return (NULL); crhold(cred); } object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(round_page(offset + size))); if (cred != NULL) { VM_OBJECT_WLOCK(object); object->cred = cred; object->charge = size; VM_OBJECT_WUNLOCK(object); } return (object); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Read I/O without either a corresponding resident page or swap * page: use zero_region. This is intended to avoid instantiating * pages on read from a sparse region. */ if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL && !vm_pager_has_page(obj, idx, NULL, NULL)) { VM_OBJECT_WUNLOCK(obj); return (uiomove(__DECONST(void *, zero_region), tlen, uio)); }
static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { unsigned int npages; vm_offset_t off; cdev_t dev; dev = handle; /* * Check that the specified range of the device allows the desired * protection. * * XXX assumes VM_PROT_* == PROT_* */ npages = OFF_TO_IDX(size); for (off = foff; npages--; off += PAGE_SIZE) { if (dev_dmmap(dev, off, (int)prot, NULL) == -1) return (EINVAL); } return (0); }
/* * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. * * MPSAFE */ vm_object_t vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; struct vnode *vp; /* * Pageout to vnode, no can do yet. */ if (handle == NULL) return (NULL); vp = (struct vnode *) handle; /* * If the object is being terminated, wait for it to * go away. */ retry: while ((object = vp->v_object) != NULL) { VM_OBJECT_WLOCK(object); if ((object->flags & OBJ_DEAD) == 0) break; vm_object_set_flag(object, OBJ_DISCONNECTWNT); VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vadead", 0); } KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference")); if (object == NULL) { /* * Add an object of the appropriate size */ object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); object->un_pager.vnp.vnp_size = size; object->un_pager.vnp.writemappings = 0; object->handle = handle; VI_LOCK(vp); if (vp->v_object != NULL) { /* * Object has been created while we were sleeping */ VI_UNLOCK(vp); VM_OBJECT_WLOCK(object); KASSERT(object->ref_count == 1, ("leaked ref %p %d", object, object->ref_count)); object->type = OBJT_DEAD; object->ref_count = 0; VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); goto retry; } vp->v_object = object; VI_UNLOCK(vp); } else { object->ref_count++; #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif VM_OBJECT_WUNLOCK(object); } vref(vp); return (object); }
/* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * NOTE: This routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. * * NOTE: vp->v_filesize is initialized to NOOFFSET (-1), be sure that * we do not blow up on the case. nsize will always be >= 0, however. */ void vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) { vm_pindex_t nobjsize; vm_pindex_t oobjsize; vm_object_t object; object = vp->v_object; if (object == NULL) return; vm_object_hold(object); KKASSERT(vp->v_object == object); /* * Hasn't changed size */ if (nsize == vp->v_filesize) { vm_object_drop(object); return; } /* * Has changed size. Adjust the VM object's size and v_filesize * before we start scanning pages to prevent new pages from being * allocated during the scan. */ nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); oobjsize = object->size; object->size = nobjsize; /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nsize < vp->v_filesize) { vp->v_filesize = nsize; if (nobjsize < oobjsize) { vm_object_page_remove(object, nobjsize, oobjsize, FALSE); } /* * This gets rid of garbage at the end of a page that is now * only partially backed by the vnode. Since we are setting * the entire page valid & clean after we are done we have * to be sure that the portion of the page within the file * bounds is already valid. If it isn't then making it * valid would create a corrupt block. */ if (nsize & PAGE_MASK) { vm_offset_t kva; vm_page_t m; m = vm_page_lookup_busy_wait(object, OFF_TO_IDX(nsize), TRUE, "vsetsz"); if (m && m->valid) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; struct lwbuf *lwb; struct lwbuf lwb_cache; /* * Clear out partial-page garbage in case * the page has been mapped. * * This is byte aligned. */ lwb = lwbuf_alloc(m, &lwb_cache); kva = lwbuf_kva(lwb); bzero((caddr_t)kva + base, size); lwbuf_free(lwb); /* * XXX work around SMP data integrity race * by unmapping the page from user processes. * The garbage we just cleared may be mapped * to a user process running on another cpu * and this code is not running through normal * I/O channels which handle SMP issues for * us, so unmap page to synchronize all cpus. * * XXX should vm_pager_unmap_page() have * dealt with this? */ vm_page_protect(m, VM_PROT_NONE); /* * Clear out partial-page dirty bits. This * has the side effect of setting the valid * bits, but that is ok. There are a bunch * of places in the VM system where we expected * m->dirty == VM_PAGE_BITS_ALL. The file EOF * case is one of them. If the page is still * partially dirty, make it fully dirty. * * NOTE: We do not clear out the valid * bits. This would prevent bogus_page * replacement from working properly. * * NOTE: We do not want to clear the dirty * bit for a partial DEV_BSIZE'd truncation! * This is DEV_BSIZE aligned! */ vm_page_clear_dirty_beg_nonincl(m, base, size); if (m->dirty != 0) m->dirty = VM_PAGE_BITS_ALL; vm_page_wakeup(m); } else if (m) { vm_page_wakeup(m); } } } else { vp->v_filesize = nsize; } vm_object_drop(object); }
static void os_balloonobject_create(void) { global_state.vmobject = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Parallel reads of the page content from disk are prevented * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { rv = vm_pager_get_pages(obj, &m, 1, 0); m = vm_page_lookup(obj, idx); if (m == NULL) { printf( "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n", obj, idx, rv); VM_OBJECT_WUNLOCK(obj); return (EIO); } if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { VM_OBJECT_WLOCK(obj); vm_page_dirty(m); VM_OBJECT_WUNLOCK(obj); } vm_page_lock(m); vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { /* Requeue to maintain LRU ordering. */ vm_page_requeue(m); } vm_page_unlock(m); return (error); }
static int tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { struct sf_buf *sf; vm_pindex_t idx; vm_page_t m; vm_offset_t offset; off_t addr; size_t tlen; char *ma; int error; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0 && vobj->cache == NULL)) goto nocache; VM_OBJECT_LOCK(vobj); lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { if ((m->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(m); vm_page_sleep(m, "tmfsmr"); goto lookupvpg; } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(vobj); vm_page_wakeup(m); VM_OBJECT_UNLOCK(vobj); return (error); } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { KASSERT(offset == 0, ("unexpected offset in tmpfs_mappedread for sendfile")); if ((m->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(m); vm_page_sleep(m, "tmfsmr"); goto lookupvpg; } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); sched_pin(); sf = sf_buf_alloc(m, SFB_CPUPRIVATE); ma = (char *)sf_buf_kva(sf); error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma); if (error == 0) { if (tlen != PAGE_SIZE) bzero(ma + tlen, PAGE_SIZE - tlen); uio->uio_offset += tlen; uio->uio_resid -= tlen; } sf_buf_free(sf); sched_unpin(); VM_OBJECT_LOCK(vobj); if (error == 0) m->valid = VM_PAGE_BITS_ALL; vm_page_wakeup(m); VM_OBJECT_UNLOCK(vobj); return (error); } VM_OBJECT_UNLOCK(vobj); nocache: error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio); return (error); }
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; long ahead, behind; int alloc_req, era, faultcount, nera, reqpage, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; int locked, error; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = reqpage = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ VM_OBJECT_WLOCK(fs.first_object); vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is dead, we stop here */ if (fs.object->flags & OBJ_DEAD) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * check for page-based copy on write. * We check fs.object == fs.first_object so * as to ensure the legacy COW mechanism is * used when the page in question is part of * a shadow object. Otherwise, vm_page_cowfault() * removes the page from the backing object, * which is not what we want. */ vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } /* * Wait/Retry if the page is busy. We have to do this * if the page is busy via either VPO_BUSY or * vm_page_t->busy because the vm_pager may be using * vm_page_t->busy for pageouts ( and even pageins if * it is the vnode pager ), and we could end up trying * to pagein and pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a vm_page_t->busy page except, perhaps, * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, TRUE, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_page_remque(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_busy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } /* * Page is not resident, If this is the search termination * or the pager might contain the page, allocate a new page. */ if (TRYPAGER || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ fs.m = NULL; if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 if ((fs.object->flags & OBJ_COLORED) == 0) { fs.object->flags |= OBJ_COLORED; fs.object->pg_color = atop(vaddr) - fs.pindex; } #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have found a valid page or we have allocated a new page. * The page thus may not be valid or may not be entirely * valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (TRYPAGER) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; ahead = atop(fs.entry->end - vaddr) - 1; if (ahead > VM_FAULT_READ_AHEAD_MAX) ahead = VM_FAULT_READ_AHEAD_MAX; if (fs.pindex == fs.entry->next_read) vm_fault_cache_behind(&fs, VM_FAULT_READ_MAX); } else { /* * If this is a sequential page fault, then * arithmetically increase the number of pages * in the read-ahead window. Otherwise, reset * the read-ahead window to its smallest size. */ behind = atop(vaddr - fs.entry->start); if (behind > VM_FAULT_READ_BEHIND) behind = VM_FAULT_READ_BEHIND; ahead = atop(fs.entry->end - vaddr) - 1; era = fs.entry->read_ahead; if (fs.pindex == fs.entry->next_read) { nera = era + behind; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; behind = 0; if (ahead > nera) ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_cache_behind(&fs, VM_FAULT_CACHE_BEHIND); } else if (ahead > VM_FAULT_READ_AHEAD_MIN) ahead = VM_FAULT_READ_AHEAD_MIN; if (era != ahead) fs.entry->read_ahead = ahead; } /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are VPO_BUSY'd. */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * now we find out if any other pages should be paged * in at this time this routine checks to see if the * pages surrounding this fault reside in the same * object as the page for this fault. If they do, * then they are faulted in also into the object. The * array "marray" returned contains an array of * vm_page_t structs where one of them is the * vm_page_t passed to the routine. The reqpage * return value is the index into the marray for the * vm_page_t passed to the routine. * * fs.m plus the additional pages are VPO_BUSY'd. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); rv = faultcount ? vm_pager_get_pages(fs.object, marray, faultcount, reqpage) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* * Found the page. Leave it busy while we play * with it. */ /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (!fs.m) { unlock_and_deallocate(&fs); goto RetryFault; } hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } /* * Remove the bogus page (which does not exist at this * object/offset); before doing so, we must get back * our object lock to preserve our invariant. * * Also wake up any other process that may want to bring * in this page. * * If this is the top-level object, we must leave the * busy page to prevent another process from rushing * past us, and inserting the page in that object at * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * Data outside the range of the pager or an I/O error */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this * point, m has been freed and is invalid! */ } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } KASSERT((fs.m->oflags & VPO_BUSY) != 0, ("vm_fault: not busy after main loop")); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * get rid of the unnecessary page */ vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. Since the pager does not return the * actual set of pages that it read, this update is based on * the requested set. Typically, the requested and actual * sets are the same. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + faultcount - reqpage; if ((prot & VM_PROT_WRITE) != 0 || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { if (fs.m->dirty == 0) fs.m->oflags |= VPO_NOSYNC; } else { fs.m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } } /* * Page had better still be busy */ KASSERT(fs.m->oflags & VPO_BUSY, ("vm_fault: page %p not busy!", fs.m)); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { PCPU_INC(cnt.v_io_faults); curthread->td_ru.ru_majflt++; } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m; vm_pindex_t nobjsize; vm_ooffset_t delta; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, FALSE); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_uid(delta, object->uip); object->charge -= delta; /* * If the last page is partially mapped, then zero out * the garbage at the end of the page. See comments * in vnode_pager_setsize() for more details. * * XXXJHB: This handles in memory pages, but what about * a page swapped out to disk? */ if ((length & PAGE_MASK) && (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && m->valid != 0) { int base = (int)length & PAGE_MASK; int size = PAGE_SIZE - base; pmap_zero_page_area(m, base, size); /* * Update the valid bits to reflect the blocks that * have been zeroed. Some of these valid bits may * have already been set. */ vm_page_set_valid(m, base, size); /* * Round "base" to the next block boundary so that the * dirty bit for a partially zeroed block is not * cleared. */ base = roundup2(base, DEV_BSIZE); vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); vm_page_unlock_queues(); } else if ((length & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(length), nobjsize); } } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_uid(delta, object->uip)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
/* See: old_dev_pager_fault() in device_pager.c as an example. */ static int cheri_compositor_cfb_pg_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_pindex_t pidx; vm_paddr_t paddr; vm_page_t page; struct cfb_vm_object *cfb_vm_obj; struct cdev *dev; struct cheri_compositor_softc *sc; struct cdevsw *csw; vm_memattr_t memattr; int ref; int retval; pidx = OFF_TO_IDX(offset); VM_OBJECT_WUNLOCK(vm_obj); cfb_vm_obj = vm_obj->handle; dev = cfb_vm_obj->dev; sc = dev->si_drv1; retval = VM_PAGER_OK; CHERI_COMPOSITOR_DEBUG(sc, "vm_obj: %p, offset: %lu, prot: %i", vm_obj, offset, prot); csw = dev_refthread(dev, &ref); if (csw == NULL) { retval = VM_PAGER_FAIL; goto done_unlocked; } /* Traditional d_mmap() call. */ CHERI_COMPOSITOR_DEBUG(sc, "offset: %lu, nprot: %i", offset, prot); if (validate_prot_and_offset(sc, cfb_vm_obj->pool->mapped_fd, prot, offset) != 0) { retval = VM_PAGER_FAIL; goto done_unlocked; } paddr = calculate_physical_address(sc, cfb_vm_obj->pool, offset); memattr = VM_MEMATTR_UNCACHEABLE; CHERI_COMPOSITOR_DEBUG(sc, "paddr: %p, memattr: %i", (void *) paddr, memattr); dev_relthread(dev, ref); /* Sanity checks. */ KASSERT((((*mres)->flags & PG_FICTITIOUS) == 0), ("Expected non-fictitious page.")); /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(vm_obj); vm_page_lock(*mres); vm_page_free(*mres); vm_page_unlock(*mres); *mres = page; vm_page_insert(page, vm_obj, pidx); page->valid = VM_PAGE_BITS_ALL; /* Success! */ retval = VM_PAGER_OK; goto done; done_unlocked: VM_OBJECT_WLOCK(vm_obj); done: CHERI_COMPOSITOR_DEBUG(sc, "Finished with mres: %p (retval: %i)", *mres, retval); return (retval); }
vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { vm_object_t object, object1; vm_pindex_t pindex; u_short color; if (tp != OBJT_DEVICE && tp != OBJT_MGTDEVICE) return (NULL); /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); size = round_page(size); pindex = OFF_TO_IDX(foff + size); if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) return (NULL); mtx_lock(&dev_pager_mtx); /* * Look up pager, creating as necessary. */ object1 = NULL; object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. Initialize * the object's pg_color based upon the physical address of the * device's memory. */ mtx_unlock(&dev_pager_mtx); object1 = vm_object_allocate(tp, pindex); object1->flags |= OBJ_COLORED; object1->pg_color = color; object1->handle = handle; object1->un_pager.devp.ops = ops; object1->un_pager.devp.dev = handle; TAILQ_INIT(&object1->un_pager.devp.devp_pglist); mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object != NULL) { /* * We raced with other thread while allocating object. */ if (pindex > object->size) object->size = pindex; } else { object = object1; object1 = NULL; object->handle = handle; TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); KASSERT(object->type == tp, ("Inconsistent device pager type %p %d", object, tp)); } } else { if (pindex > object->size) object->size = pindex; } mtx_unlock(&dev_pager_mtx); if (object1 != NULL) { object1->handle = object1; mtx_lock(&dev_pager_mtx); TAILQ_INSERT_TAIL(&dev_pager_object_list, object1, pager_object_list); mtx_unlock(&dev_pager_mtx); vm_object_deallocate(object1); } return (object); }
/* * Truncate the inode ip to at most length size, freeing the * disk blocks. */ int ffs_truncate(vnode *vp, off_t length, int flags, Ucred *cred) { print("HARVEY TODO: %s\n", __func__); #if 0 struct inode *ip; ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR]; ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno; struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; int softdeptrunc, journaltrunc; int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror, indiroff, waitforupdate; off_t osize; ip = VTOI(vp); ump = VFSTOUFS(vp->v_mount); fs = ump->um_fs; bo = &vp->v_bufobj; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); #ifdef QUOTA error = getinoquota(ip); if (error) return (error); #endif /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume * traditional behavior, e.g., just the normal data. */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) flags |= IO_SYNC; waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp); /* * If we are truncating the extended-attributes, and cannot * do it with soft updates, then do it slowly here. If we are * truncating both the extended attributes and the file contents * (e.g., the file is being unlinked), then pick it off with * soft updates below. */ allerror = 0; needextclean = 0; softdeptrunc = 0; journaltrunc = DOINGSUJ(vp); if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) softdeptrunc = !softdep_slowdown(vp); extblocks = 0; datablocks = DIP(ip, i_blocks); if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); if (softdeptrunc || journaltrunc) { if ((flags & IO_NORMAL) == 0) goto extclean; needextclean = 1; } else { if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); #ifdef QUOTA (void) chkdq(ip, -extblocks, NOCRED, 0); #endif vinvalbuf(vp, V_ALT, 0, 0); vn_pages_remove(vp, OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; ip->i_din2->di_extsize = 0; for (i = 0; i < UFS_NXADDR; i++) { oldblks[i] = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; } ip->i_flag |= IN_CHANGE; if ((error = ffs_update(vp, waitforupdate))) return (error); for (i = 0; i < UFS_NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i], sblksize(fs, osize, i), ip->i_number, vp->v_type, nil); } } } if ((flags & IO_NORMAL) == 0) return (0); if (vp->v_type == VLNK && (ip->i_size < vp->v_mount->mnt_maxsymlinklen || datablocks == 0)) { #ifdef INVARIANTS if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif bzero(SHORTLINK(ip), (uint)ip->i_size); ip->i_size = 0; DIP_SET(ip, i_size, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) goto extclean; return (ffs_update(vp, waitforupdate)); } if (ip->i_size == length) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) goto extclean; return (ffs_update(vp, 0)); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); if (IS_SNAPSHOT(ip)) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; osize = ip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { vnode_pager_setsize(vp, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) { vnode_pager_setsize(vp, osize); return (error); } ip->i_size = length; DIP_SET(ip, i_size, length); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (flags & IO_SYNC) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); ip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(vp, waitforupdate)); } /* * Lookup block number for a given offset. Zero length files * have no blocks, so return a blkno of -1. */ lbn = lblkno(fs, length - 1); if (length == 0) { blkno = -1; } else if (lbn < UFS_NDADDR) { blkno = DIP(ip, i_db[lbn]); } else { error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, cred, BA_METAONLY, &bp); if (error) return (error); indiroff = (lbn - UFS_NDADDR) % NINDIR(fs); if (I_IS_UFS1(ip)) blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; else blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; /* * If the block number is non-zero, then the indirect block * must have been previously allocated and need not be written. * If the block number is zero, then we may have allocated * the indirect block and hence need to write it out. */ if (blkno != 0) brelse(bp); else if (flags & IO_SYNC) bwrite(bp); else bdwrite(bp); } /* * If the block number at the new end of the file is zero, * then we must allocate it to ensure that the last block of * the file is allocated. Soft updates does not handle this * case, so here we have to clean up the soft updates data * structures describing the allocation past the truncation * point. Finding and deallocating those structures is a lot of * work. Since partial truncation with a hole at the end occurs * rarely, we solve the problem by syncing the file so that it * will have no soft updates data structures left. */ if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); if (blkno != 0 && DOINGSOFTDEP(vp)) { if (softdeptrunc == 0 && journaltrunc == 0) { /* * If soft updates cannot handle this truncation, * clean up soft dependency data structures and * fall through to the synchronous truncation. */ if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); } else { flags = IO_NORMAL | (needextclean ? IO_EXT: 0); if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, flags); else softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); if (journaltrunc == 0) { ip->i_flag |= IN_CHANGE | IN_UPDATE; error = ffs_update(vp, 0); } return (error); } } /* * Shorten the size of the file. If the last block of the * shortened file is unallocated, we must allocate it. * Additionally, if the file is not being truncated to a * block boundary, the contents of the partial block * following the end of the file must be zero'ed in * case it ever becomes accessible again because of * subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (blkno != 0 && offset == 0) { ip->i_size = length; DIP_SET(ip, i_size, length); } else { lbn = lblkno(fs, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); ip->i_size = length; DIP_SET(ip, i_size, length); size = blksize(fs, ip, lbn); if (vp->v_type != VDIR && offset != 0) bzero((char *)bp->b_data + offset, (uint)(size - offset)); /* Kirk's code has reallocbuf(bp, size, 1) here */ allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (flags & IO_SYNC) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]); if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { oldblks[i] = DIP(ip, i_db[i]); if (i > lastblock) DIP_SET(ip, i_db[i], 0); } ip->i_flag |= IN_CHANGE | IN_UPDATE; allerror = ffs_update(vp, waitforupdate); /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { newblks[i] = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], oldblks[i]); } for (i = 0; i < UFS_NIADDR; i++) { newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]); } ip->i_size = osize; DIP_SET(ip, i_size, osize); error = vtruncbuf(vp, cred, length, fs->fs_bsize); if (error && (allerror == 0)) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(ip, i_ib[level]); if (bn != 0) { error = ffs_indirtrunc(ip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ump->um_devvp, bn, fs->fs_bsize, ip->i_number, vp->v_type, nil); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = UFS_NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(ip, i_db[i]); if (bn == 0) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number, vp->v_type, nil); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(ip, i_db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, ip, lastblock); ip->i_size = length; DIP_SET(ip, i_size, length); newspace = blksize(fs, ip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ump->um_devvp, bn, oldspace - newspace, ip->i_number, vp->v_type, nil); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef INVARIANTS for (level = SINGLE; level <= TRIPLE; level++) if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level])) panic("ffs_truncate1"); for (i = 0; i < UFS_NDADDR; i++) if (newblks[i] != DIP(ip, i_db[i])) panic("ffs_truncate2"); BO_LOCK(bo); if (length == 0 && (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("ffs_truncate3"); BO_UNLOCK(bo); #endif /* INVARIANTS */ /* * Put back the real size. */ ip->i_size = length; DIP_SET(ip, i_size, length); if (DIP(ip, i_blocks) >= blocksreleased) DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased); else /* sanity */ DIP_SET(ip, i_blocks, 0); ip->i_flag |= IN_CHANGE; #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, 0); #endif return (allerror); extclean: if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, IO_EXT); else softdep_setup_freeblocks(ip, length, IO_EXT); return (ffs_update(vp, waitforupdate)); #endif // 0 return 0; }
static int ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct ttm_buffer_object *bo = vm_obj->handle; struct ttm_bo_device *bdev = bo->bdev; struct ttm_tt *ttm = NULL; vm_page_t m, m1, oldm; int ret; int retval = VM_PAGER_OK; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; vm_object_pip_add(vm_obj, 1); oldm = *mres; if (oldm != NULL) { vm_page_lock(oldm); vm_page_remove(oldm); vm_page_unlock(oldm); *mres = NULL; } else oldm = NULL; retry: VM_OBJECT_WUNLOCK(vm_obj); m = NULL; reserve: ret = ttm_bo_reserve(bo, false, false, false, 0); if (unlikely(ret != 0)) { if (ret == -EBUSY) { kern_yield(0); goto reserve; } } if (bdev->driver->fault_reserve_notify) { ret = bdev->driver->fault_reserve_notify(bo); switch (ret) { case 0: break; case -EBUSY: case -ERESTART: case -EINTR: kern_yield(0); goto reserve; default: retval = VM_PAGER_ERROR; goto out_unlock; } } /* * Wait for buffer data in transit, due to a pipelined * move. */ mtx_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { /* * Here, the behavior differs between Linux and FreeBSD. * * On Linux, the wait is interruptible (3rd argument to * ttm_bo_wait). There must be some mechanism to resume * page fault handling, once the signal is processed. * * On FreeBSD, the wait is uninteruptible. This is not a * problem as we can't end up with an unkillable process * here, because the wait will eventually time out. * * An example of this situation is the Xorg process * which uses SIGALRM internally. The signal could * interrupt the wait, causing the page fault to fail * and the process to receive SIGSEGV. */ ret = ttm_bo_wait(bo, false, false, false); mtx_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } } else mtx_unlock(&bdev->fence_lock); ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } ret = ttm_mem_io_reserve_vm(bo); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } /* * Strictly, we're not allowed to modify vma->vm_page_prot here, * since the mmap_sem is only held in read mode. However, we * modify only the caching bits of vma->vm_page_prot and * consider those bits protected by * the bo->mutex, as we should be the only writers. * There shouldn't really be any readers of these bits except * within vm_insert_mixed()? fork? * * TODO: Add a list of vmas to the bo, and change the * vma->vm_page_prot when the object changes caching policy, with * the correct locks held. */ if (!bo->mem.bus.is_iomem) { /* Allocate all page at once, most common usage */ ttm = bo->ttm; if (ttm->bdev->driver->ttm_tt_populate(ttm)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } } if (bo->mem.bus.is_iomem) { m = PHYS_TO_VM_PAGE(bo->mem.bus.base + bo->mem.bus.offset + offset); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("physical address %#jx not fictitious", (uintmax_t)(bo->mem.bus.base + bo->mem.bus.offset + offset))); pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement)); } else { ttm = bo->ttm; m = ttm->pages[OFF_TO_IDX(offset)]; if (unlikely(!m)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } pmap_page_set_memattr(m, (bo->mem.placement & TTM_PL_FLAG_CACHED) ? VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement)); } VM_OBJECT_WLOCK(vm_obj); if (vm_page_busied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(vm_obj); vm_page_busy_sleep(m, "ttmpbs"); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { VM_OBJECT_WUNLOCK(vm_obj); VM_WAIT; VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } } else { KASSERT(m == m1, ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } m->valid = VM_PAGE_BITS_ALL; *mres = m; vm_page_xbusy(m); if (oldm != NULL) { vm_page_lock(oldm); vm_page_free(oldm); vm_page_unlock(oldm); } out_io_unlock1: ttm_mem_io_unlock(man); out_unlock1: ttm_bo_unreserve(bo); vm_object_pip_wakeup(vm_obj); return (retval); out_io_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_io_unlock1; out_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_unlock1; }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&map->token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? * * XXX vm_token - legacy for pmap_ts_referenced * in i386 and vkernel pmap code. */ lwkt_gettoken(&vm_token); vm_object_hold(current->object.vm_object); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } vm_object_drop(current->object.vm_object); lwkt_reltoken(&vm_token); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&map->token); return (error); }
/* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * Note: this routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. */ void vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) { vm_object_t object; vm_page_t m; vm_pindex_t nobjsize; if ((object = vp->v_object) == NULL) return; /* ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */ VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); return; } KASSERT(object->type == OBJT_VNODE, ("not vnode-backed object %p", object)); if (nsize == object->un_pager.vnp.vnp_size) { /* * Hasn't changed size */ VM_OBJECT_WUNLOCK(object); return; } nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); if (nsize < object->un_pager.vnp.vnp_size) { /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. * * XXX for some reason (I don't know yet), if we take a * completely invalid page and mark it partially valid * it can screw up NFS reads, so we don't allow the case. */ if ((nsize & PAGE_MASK) && (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && m->valid != 0) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; /* * Clear out partial-page garbage in case * the page has been mapped. */ pmap_zero_page_area(m, base, size); /* * Update the valid bits to reflect the blocks that * have been zeroed. Some of these valid bits may * have already been set. */ vm_page_set_valid_range(m, base, size); /* * Round "base" to the next block boundary so that the * dirty bit for a partially zeroed block is not * cleared. */ base = roundup2(base, DEV_BSIZE); /* * Clear out partial-page dirty bits. * * note that we do not clear out the valid * bits. This would prevent bogus_page * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } } object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_WUNLOCK(object); }
/* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t src_readonly, upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0; /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else { dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } access = prot = dst_entry->protection; /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object does share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { /* * Allocate a page in the destination object. */ do { dst_m = vm_page_alloc(dst_object, dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_WAIT; VM_OBJECT_WLOCK(dst_object); } } while (dst_m == NULL); /* * Find the page in the source object, and copy it in. * (Because the source is wired down, the page will be in * memory.) */ VM_OBJECT_WLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && src_readonly && (backing_object = object->backing_object) != NULL) { /* * Allow fallback to backing objects if we are reading. */ VM_OBJECT_WLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); VM_OBJECT_WUNLOCK(object); object = backing_object; } if (src_m == NULL) panic("vm_fault_copy_wired: page missing"); pmap_copy_page(src_m, dst_m); VM_OBJECT_WUNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; dst_m->dirty = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { vm_page_lock(src_m); vm_page_unwire(src_m, 0); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_wakeup(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } }
vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { cdev_t dev; vm_object_t object; u_short color; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); size = round_page64(size); if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) return (NULL); /* * Look up pager, creating as necessary. */ mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. */ object = vm_object_allocate_hold(tp, OFF_TO_IDX(foff + size)); object->handle = handle; object->un_pager.devp.ops = ops; object->un_pager.devp.dev = handle; TAILQ_INIT(&object->un_pager.devp.devp_pglist); /* * handle is only a device for old_dev_pager_ctor. */ if (ops->cdev_pg_ctor == old_dev_pager_ctor) { dev = handle; dev->si_object = object; } TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); vm_object_drop(object); } else { /* * Gain a reference to the object. */ vm_object_hold(object); vm_object_reference_locked(object); if (OFF_TO_IDX(foff + size) > object->size) object->size = OFF_TO_IDX(foff + size); vm_object_drop(object); } mtx_unlock(&dev_pager_mtx); return (object); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m, ma[1]; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) { VM_OBJECT_UNLOCK(object); return (EBUSY); } /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_lookup(object, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { vm_page_sleep(m, "shmtrc"); goto retry; } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, idx); } else /* A cached page was reactivated. */ rv = VM_PAGER_OK; vm_page_lock(m); if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); vm_page_wakeup(m); } else { vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_pager_page_unswapped(m); } } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
static int tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; vm_page_t vpg, tpg; vm_offset_t offset; off_t addr; size_t tlen; int error, rv; error = 0; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0 && vobj->cache == NULL)) { vpg = NULL; goto nocache; } VM_OBJECT_LOCK(vobj); lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { if ((vpg->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(vpg); vm_page_sleep(vpg, "tmfsmw"); goto lookupvpg; } vm_page_busy(vpg); vm_page_undirty(vpg); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&vpg, offset, tlen, uio); } else { if (__predict_false(vobj->cache != NULL)) vm_page_cache_free(vobj, idx, idx + 1); VM_OBJECT_UNLOCK(vobj); vpg = NULL; } nocache: VM_OBJECT_LOCK(tobj); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(tpg); vm_page_free(tpg); vm_page_unlock(tpg); error = EIO; goto out; } } else vm_page_zero_invalid(tpg, TRUE); } VM_OBJECT_UNLOCK(tobj); if (vpg == NULL) error = uiomove_fromphys(&tpg, offset, tlen, uio); else { KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid")); pmap_copy_page(vpg, tpg); } VM_OBJECT_LOCK(tobj); if (error == 0) { KASSERT(tpg->valid == VM_PAGE_BITS_ALL, ("parts of tpg invalid")); vm_page_dirty(tpg); } vm_page_lock(tpg); vm_page_unwire(tpg, TRUE); vm_page_unlock(tpg); vm_page_wakeup(tpg); out: VM_OBJECT_UNLOCK(tobj); if (vpg != NULL) { VM_OBJECT_LOCK(vobj); vm_page_wakeup(vpg); VM_OBJECT_UNLOCK(vobj); } return (error); }