vm_page_t shmem_read_mapping_page(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; int rv; VM_OBJECT_LOCK_ASSERT_OWNED(object); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(object, pindex)) { rv = vm_pager_get_page(object, &m, 1); m = vm_page_lookup(object, pindex); if (m == NULL) return ERR_PTR(-ENOMEM); if (rv != VM_PAGER_OK) { vm_page_free(m); return ERR_PTR(-ENOMEM); } } else { pmap_zero_page(VM_PAGE_TO_PHYS(m)); m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } } vm_page_wire(m); vm_page_wakeup(m); return (m); }
void ttm_bo_release_mmap(struct ttm_buffer_object *bo) { vm_object_t vm_obj; vm_page_t m; int i; vm_obj = cdev_pager_lookup(bo); if (vm_obj == NULL) return; VM_OBJECT_WLOCK(vm_obj); retry: for (i = 0; i < bo->num_pages; i++) { m = vm_page_lookup(vm_obj, i); if (m == NULL) continue; if (vm_page_sleep_if_busy(m, "ttm_unm")) goto retry; cdev_pager_free_page(vm_obj, m); } VM_OBJECT_WUNLOCK(vm_obj); vm_object_deallocate(vm_obj); }
/* * kmem_unback: * * Unmap and free the physical pages underlying the specified virtual * address range. * * A physical page must exist within the specified object at each index * that is being unmapped. */ static int _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m, next; vm_offset_t end, offset; int domain; KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); if (size == 0) return (0); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); m = vm_page_lookup(object, atop(offset)); domain = vm_phys_domain(m); for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); return (domain); }
static int rtR0MemObjFreeBSDPhysAllocHelper(vm_object_t pObject, u_long cPages, vm_paddr_t VmPhysAddrHigh, u_long uAlignment, bool fContiguous, bool fWire, int rcNoMem) { if (fContiguous) { if (rtR0MemObjFreeBSDContigPhysAllocHelper(pObject, 0, cPages, VmPhysAddrHigh, uAlignment, fWire) != NULL) return VINF_SUCCESS; return rcNoMem; } for (vm_pindex_t iPage = 0; iPage < cPages; iPage++) { vm_page_t pPage = rtR0MemObjFreeBSDContigPhysAllocHelper(pObject, iPage, 1, VmPhysAddrHigh, uAlignment, fWire); if (!pPage) { /* Free all allocated pages */ VM_OBJECT_LOCK(pObject); while (iPage-- > 0) { pPage = vm_page_lookup(pObject, iPage); vm_page_lock_queues(); if (fWire) vm_page_unwire(pPage, 0); vm_page_free(pPage); vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(pObject); return rcNoMem; } } return VINF_SUCCESS; }
/* * vm_fault_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of vm_map_pmap_enter, except it runs at page fault time instead * of mmap time. */ static void vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) { int i; vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; vm_object_t object; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; object = entry->object.vm_object; starta = addra - PFBAK * PAGE_SIZE; if (starta < entry->start) { starta = entry->start; } else if (starta > addra) { starta = 0; } for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t backing_object, lobject; addr = addra + prefault_pageorder[i]; if (addr > addra + (PFFOR * PAGE_SIZE)) addr = 0; if (addr < starta || addr >= entry->end) continue; if (!pmap_is_prefaultable(pmap, addr)) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = object; VM_OBJECT_WLOCK(lobject); while ((m = vm_page_lookup(lobject, pindex)) == NULL && lobject->type == OBJT_DEFAULT && (backing_object = lobject->backing_object) != NULL) { KASSERT((lobject->backing_object_offset & PAGE_MASK) == 0, ("vm_fault_prefault: unaligned object offset")); pindex += lobject->backing_object_offset >> PAGE_SHIFT; VM_OBJECT_WLOCK(backing_object); VM_OBJECT_WUNLOCK(lobject); lobject = backing_object; } /* * give-up when a page is not in memory */ if (m == NULL) { VM_OBJECT_WUNLOCK(lobject); break; } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); VM_OBJECT_WUNLOCK(lobject); } }
/* * Identify the physical page mapped at the given kernel virtual * address. Insert this physical page into the given address space at * the given virtual address, replacing the physical page, if any, * that already exists there. */ static int vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobject; vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, ("vm_pgmoveco: uaddr is not page aligned")); /* * Herein the physical page is validated and dirtied. It is * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; vm_page_lock_queues(); pmap_remove_all(user_pg); vm_page_free(user_pg); } else { /* * Even if a physical page does not exist in the * object chain's first object, a physical page from a * backing object may be mapped read only. */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); vm_page_lock_queues(); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); }
static void os_kmem_free(vm_page_t page) // IN { os_state *state = &global_state; os_pmap *pmap = &state->pmap; if ( !vm_page_lookup(state->vmobject, page->pindex) ) { return; } os_pmap_putindex(pmap, page->pindex); vm_page_free(page); }
/* * Remap wired pages in an object into a new region. * The object is assumed to be mapped into the kernel map or * a submap. */ void kmem_remap_pages( vm_object_t object, vm_offset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t protection) { /* * Mark the pmap region as not pageable. */ pmap_pageable(kernel_pmap, start, end, FALSE); while (start < end) { vm_page_t mem; vm_object_lock(object); /* * Find a page */ if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL) panic("kmem_remap_pages"); /* * Wire it down (again) */ vm_page_lock_queues(); vm_page_wire(mem); vm_page_unlock_queues(); vm_object_unlock(object); /* * Enter it in the kernel pmap. The page isn't busy, * but this shouldn't be a problem because it is wired. */ PMAP_ENTER(kernel_pmap, start, mem, protection, TRUE); start += PAGE_SIZE; offset += PAGE_SIZE; } }
static void pscnv_gem_pager_dtor(void *handle) { struct drm_gem_object *gem_obj = handle; struct pscnv_bo *bo = gem_obj->driver_private; struct drm_device *dev = gem_obj->dev; vm_object_t devobj; DRM_LOCK(dev); devobj = cdev_pager_lookup(handle); if (devobj != NULL) { vm_size_t page_count = OFF_TO_IDX(bo->size); vm_page_t m; int i; VM_OBJECT_LOCK(devobj); for (i = 0; i < page_count; i++) { m = vm_page_lookup(devobj, i); if (!m) continue; if (pscnv_mem_debug > 0) NV_WARN(dev, "Freeing %010llx + %08llx (%p\n", bo->start, i * PAGE_SIZE, m); cdev_pager_free_page(devobj, m); } VM_OBJECT_UNLOCK(devobj); vm_object_deallocate(devobj); } else { DRM_UNLOCK(dev); NV_ERROR(dev, "Could not find handle %p bo %p\n", handle, bo); return; } if (pscnv_mem_debug > 0) NV_WARN(dev, "Freed %010llx (%p)\n", bo->start, bo); //kfree(bo->fake_pages); if (bo->chan) pscnv_chan_unref(bo->chan); else drm_gem_object_unreference_unlocked(gem_obj); DRM_UNLOCK(dev); }
static void privcmd_pg_dtor(void *handle) { struct xen_remove_from_physmap rm = { .domid = DOMID_SELF }; struct privcmd_map *map = handle; int error; vm_size_t i; vm_page_t m; /* * Remove the mappings from the used pages. This will remove the * underlying p2m bindings in Xen second stage translation. */ if (map->mapped == true) { VM_OBJECT_WLOCK(map->mem); retry: for (i = 0; i < map->size; i++) { m = vm_page_lookup(map->mem, i); if (m == NULL) continue; if (vm_page_sleep_if_busy(m, "pcmdum")) goto retry; cdev_pager_free_page(map->mem, m); } VM_OBJECT_WUNLOCK(map->mem); for (i = 0; i < map->size; i++) { rm.gpfn = atop(map->phys_base_addr) + i; HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm); } free(map->err, M_PRIVCMD); } error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id, map->pseudo_phys_res); KASSERT(error == 0, ("Unable to release memory resource: %d", error)); free(map, M_PRIVCMD); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Read I/O without either a corresponding resident page or swap * page: use zero_region. This is intended to avoid instantiating * pages on read from a sparse region. */ if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL && !vm_pager_has_page(obj, idx, NULL, NULL)) { VM_OBJECT_WUNLOCK(obj); return (uiomove(__DECONST(void *, zero_region), tlen, uio)); }
int memory_object_control_uiomove( memory_object_control_t control, memory_object_offset_t offset, void * uio, int start_offset, int io_requested, int mark_dirty, int take_reference) { vm_object_t object; vm_page_t dst_page; int xsize; int retval = 0; int cur_run; int cur_needed; int i; int orig_offset; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { return (0); } assert(!object->internal); vm_object_lock(object); if (mark_dirty && object->copy != VM_OBJECT_NULL) { /* * We can't modify the pages without honoring * copy-on-write obligations first, so fall off * this optimized path and fall back to the regular * path. */ vm_object_unlock(object); return 0; } orig_offset = start_offset; while (io_requested && retval == 0) { cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; if (cur_needed > MAX_RUN) cur_needed = MAX_RUN; for (cur_run = 0; cur_run < cur_needed; ) { if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; /* * if we're in this routine, we are inside a filesystem's * locking model, so we don't ever want to wait for pages that have * list_req_pending == TRUE since it means that the * page is a candidate for some type of I/O operation, * but that it has not yet been gathered into a UPL... * this implies that it is still outside the domain * of the filesystem and that whoever is responsible for * grabbing it into a UPL may be stuck behind the filesystem * lock this thread owns, or trying to take a lock exclusively * and waiting for the readers to drain from a rw lock... * if we block in those cases, we will deadlock */ if (dst_page->list_req_pending) { if (dst_page->absent) { /* * this is the list_req_pending | absent | busy case * which originates from vm_fault_page... we want * to fall out of the fast path and go back * to the caller which will gather this page * into a UPL and issue the I/O if no one * else beats us to it */ break; } if (dst_page->pageout || dst_page->cleaning) { /* * this is the list_req_pending | pageout | busy case * or the list_req_pending | cleaning case... * which originate from the pageout_scan and * msync worlds for the pageout case and the hibernate * pre-cleaning world for the cleaning case... * we need to reset the state of this page to indicate * it should stay in the cache marked dirty... nothing else we * can do at this point... we can't block on it, we can't busy * it and we can't clean it from this routine. */ vm_page_lockspin_queues(); vm_pageout_queue_steal(dst_page, TRUE); vm_page_deactivate(dst_page); vm_page_unlock_queues(); } /* * this is the list_req_pending | cleaning case... * we can go ahead and deal with this page since * its ok for us to mark this page busy... if a UPL * tries to gather this page, it will block until the * busy is cleared, thus allowing us safe use of the page * when we're done with it, we will clear busy and wake * up anyone waiting on it, thus allowing the UPL creation * to finish */ } else if (dst_page->busy || dst_page->cleaning) { /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ if (cur_run) break; PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } /* * this routine is only called when copying * to/from real files... no need to consider * encrypted swap pages */ assert(!dst_page->encrypted); if (mark_dirty) { dst_page->dirty = TRUE; if (dst_page->cs_validated && !dst_page->cs_tainted) { /* * CODE SIGNING: * We're modifying a code-signed * page: force revalidate */ dst_page->cs_validated = FALSE; #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif pmap_disconnect(dst_page->phys_page); } } dst_page->busy = TRUE; page_run[cur_run++] = dst_page; offset += PAGE_SIZE_64; } if (cur_run == 0) /* * we hit a 'hole' in the cache or * a page we don't want to try to handle, * so bail at this point * we'll unlock the object below */ break; vm_object_unlock(object); for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) break; io_requested -= xsize; start_offset = 0; } vm_object_lock(object); /* * if we have more than 1 page to work on * in the current run, or the original request * started at offset 0 of the page, or we're * processing multiple batches, we will move * the pages to the tail of the inactive queue * to implement an LRU for read/write accesses * * the check for orig_offset == 0 is there to * mitigate the cost of small (< page_size) requests * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { vm_page_lockspin_queues(); for (i = 0; i < cur_run; i++) vm_page_lru(page_run[i]); vm_page_unlock_queues(); } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; /* * someone is explicitly referencing this page... * update clustered and speculative state * */ VM_PAGE_CONSUME_CLUSTERED(dst_page); PAGE_WAKEUP_DONE(dst_page); } orig_offset = 0; } vm_object_unlock(object); return (retval); }
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; long ahead, behind; int alloc_req, era, faultcount, nera, reqpage, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; int locked, error; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = reqpage = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ VM_OBJECT_WLOCK(fs.first_object); vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is dead, we stop here */ if (fs.object->flags & OBJ_DEAD) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * check for page-based copy on write. * We check fs.object == fs.first_object so * as to ensure the legacy COW mechanism is * used when the page in question is part of * a shadow object. Otherwise, vm_page_cowfault() * removes the page from the backing object, * which is not what we want. */ vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } /* * Wait/Retry if the page is busy. We have to do this * if the page is busy via either VPO_BUSY or * vm_page_t->busy because the vm_pager may be using * vm_page_t->busy for pageouts ( and even pageins if * it is the vnode pager ), and we could end up trying * to pagein and pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a vm_page_t->busy page except, perhaps, * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, TRUE, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_page_remque(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_busy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } /* * Page is not resident, If this is the search termination * or the pager might contain the page, allocate a new page. */ if (TRYPAGER || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ fs.m = NULL; if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 if ((fs.object->flags & OBJ_COLORED) == 0) { fs.object->flags |= OBJ_COLORED; fs.object->pg_color = atop(vaddr) - fs.pindex; } #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have found a valid page or we have allocated a new page. * The page thus may not be valid or may not be entirely * valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (TRYPAGER) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; ahead = atop(fs.entry->end - vaddr) - 1; if (ahead > VM_FAULT_READ_AHEAD_MAX) ahead = VM_FAULT_READ_AHEAD_MAX; if (fs.pindex == fs.entry->next_read) vm_fault_cache_behind(&fs, VM_FAULT_READ_MAX); } else { /* * If this is a sequential page fault, then * arithmetically increase the number of pages * in the read-ahead window. Otherwise, reset * the read-ahead window to its smallest size. */ behind = atop(vaddr - fs.entry->start); if (behind > VM_FAULT_READ_BEHIND) behind = VM_FAULT_READ_BEHIND; ahead = atop(fs.entry->end - vaddr) - 1; era = fs.entry->read_ahead; if (fs.pindex == fs.entry->next_read) { nera = era + behind; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; behind = 0; if (ahead > nera) ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_cache_behind(&fs, VM_FAULT_CACHE_BEHIND); } else if (ahead > VM_FAULT_READ_AHEAD_MIN) ahead = VM_FAULT_READ_AHEAD_MIN; if (era != ahead) fs.entry->read_ahead = ahead; } /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are VPO_BUSY'd. */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * now we find out if any other pages should be paged * in at this time this routine checks to see if the * pages surrounding this fault reside in the same * object as the page for this fault. If they do, * then they are faulted in also into the object. The * array "marray" returned contains an array of * vm_page_t structs where one of them is the * vm_page_t passed to the routine. The reqpage * return value is the index into the marray for the * vm_page_t passed to the routine. * * fs.m plus the additional pages are VPO_BUSY'd. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); rv = faultcount ? vm_pager_get_pages(fs.object, marray, faultcount, reqpage) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* * Found the page. Leave it busy while we play * with it. */ /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (!fs.m) { unlock_and_deallocate(&fs); goto RetryFault; } hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } /* * Remove the bogus page (which does not exist at this * object/offset); before doing so, we must get back * our object lock to preserve our invariant. * * Also wake up any other process that may want to bring * in this page. * * If this is the top-level object, we must leave the * busy page to prevent another process from rushing * past us, and inserting the page in that object at * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * Data outside the range of the pager or an I/O error */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this * point, m has been freed and is invalid! */ } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } KASSERT((fs.m->oflags & VPO_BUSY) != 0, ("vm_fault: not busy after main loop")); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * get rid of the unnecessary page */ vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. Since the pager does not return the * actual set of pages that it read, this update is based on * the requested set. Typically, the requested and actual * sets are the same. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + faultcount - reqpage; if ((prot & VM_PROT_WRITE) != 0 || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { if (fs.m->dirty == 0) fs.m->oflags |= VPO_NOSYNC; } else { fs.m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } } /* * Page had better still be busy */ KASSERT(fs.m->oflags & VPO_BUSY, ("vm_fault: page %p not busy!", fs.m)); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { PCPU_INC(cnt.v_io_faults); curthread->td_ru.ru_majflt++; } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); }
/* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t src_readonly, upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0; /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else { dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } access = prot = dst_entry->protection; /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object does share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { /* * Allocate a page in the destination object. */ do { dst_m = vm_page_alloc(dst_object, dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_WAIT; VM_OBJECT_WLOCK(dst_object); } } while (dst_m == NULL); /* * Find the page in the source object, and copy it in. * (Because the source is wired down, the page will be in * memory.) */ VM_OBJECT_WLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && src_readonly && (backing_object = object->backing_object) != NULL) { /* * Allow fallback to backing objects if we are reading. */ VM_OBJECT_WLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); VM_OBJECT_WUNLOCK(object); object = backing_object; } if (src_m == NULL) panic("vm_fault_copy_wired: page missing"); pmap_copy_page(src_m, dst_m); VM_OBJECT_WUNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; dst_m->dirty = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { vm_page_lock(src_m); vm_page_unwire(src_m, 0); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_wakeup(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } }
/* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * Note: this routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. */ void vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) { vm_object_t object; vm_page_t m; vm_pindex_t nobjsize; if ((object = vp->v_object) == NULL) return; /* ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */ VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); return; } KASSERT(object->type == OBJT_VNODE, ("not vnode-backed object %p", object)); if (nsize == object->un_pager.vnp.vnp_size) { /* * Hasn't changed size */ VM_OBJECT_WUNLOCK(object); return; } nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); if (nsize < object->un_pager.vnp.vnp_size) { /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. * * XXX for some reason (I don't know yet), if we take a * completely invalid page and mark it partially valid * it can screw up NFS reads, so we don't allow the case. */ if ((nsize & PAGE_MASK) && (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && m->valid != 0) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; /* * Clear out partial-page garbage in case * the page has been mapped. */ pmap_zero_page_area(m, base, size); /* * Update the valid bits to reflect the blocks that * have been zeroed. Some of these valid bits may * have already been set. */ vm_page_set_valid_range(m, base, size); /* * Round "base" to the next block boundary so that the * dirty bit for a partially zeroed block is not * cleared. */ base = roundup2(base, DEV_BSIZE); /* * Clear out partial-page dirty bits. * * note that we do not clear out the valid * bits. This would prevent bogus_page * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } } object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_WUNLOCK(object); }
int proc_rwmem(struct proc *p, struct uio *uio) { struct vmspace *vm; vm_map_t map; vm_object_t object = NULL; vm_offset_t pageno = 0; /* page number */ vm_prot_t reqprot; vm_offset_t kva; int error, writing; GIANT_REQUIRED; /* * if the vmspace is in the midst of being deallocated or the * process is exiting, don't try to grab anything. The page table * usage in that process can be messed up. */ vm = p->p_vmspace; if ((p->p_flag & P_WEXIT)) return (EFAULT); if (vm->vm_refcnt < 1) return (EFAULT); ++vm->vm_refcnt; /* * The map we want... */ map = &vm->vm_map; writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ; kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE); /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { vm_map_t tmap; vm_offset_t uva; int page_offset; /* offset into page */ vm_map_entry_t out_entry; vm_prot_t out_prot; boolean_t wired; vm_pindex_t pindex; u_int len; vm_page_t m; object = NULL; uva = (vm_offset_t)uio->uio_offset; /* * Get the page number of this segment. */ pageno = trunc_page(uva); page_offset = uva - pageno; /* * How many bytes to copy */ len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* * Fault the page on behalf of the process */ error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); if (error) { error = EFAULT; break; } /* * Now we need to get the page. out_entry, out_prot, wired, * and single_use aren't used. One would think the vm code * would be a *bit* nicer... We use tmap because * vm_map_lookup() can change the map argument. */ tmap = map; error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry, &object, &pindex, &out_prot, &wired); if (error) { error = EFAULT; /* * Make sure that there is no residue in 'object' from * an error return on vm_map_lookup. */ object = NULL; break; } m = vm_page_lookup(object, pindex); /* Allow fallback to backing objects if we are reading */ while (m == NULL && !writing && object->backing_object) { pindex += OFF_TO_IDX(object->backing_object_offset); object = object->backing_object; m = vm_page_lookup(object, pindex); } if (m == NULL) { error = EFAULT; /* * Make sure that there is no residue in 'object' from * an error return on vm_map_lookup. */ object = NULL; vm_map_lookup_done(tmap, out_entry); break; } /* * Wire the page into memory */ vm_page_lock_queues(); vm_page_wire(m); vm_page_unlock_queues(); /* * We're done with tmap now. * But reference the object first, so that we won't loose * it. */ vm_object_reference(object); vm_map_lookup_done(tmap, out_entry); pmap_qenter(kva, &m, 1); /* * Now do the i/o move. */ error = uiomove((caddr_t)(kva + page_offset), len, uio); pmap_qremove(kva, 1); /* * release the page and the object */ vm_page_lock_queues(); vm_page_unwire(m, 1); vm_page_unlock_queues(); vm_object_deallocate(object); object = NULL; } while (error == 0 && uio->uio_resid > 0); if (object) vm_object_deallocate(object); kmem_free(kernel_map, kva, PAGE_SIZE); vmspace_free(vm); return (error); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Parallel reads of the page content from disk are prevented * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { rv = vm_pager_get_pages(obj, &m, 1, 0); m = vm_page_lookup(obj, idx); if (m == NULL) { printf( "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n", obj, idx, rv); VM_OBJECT_WUNLOCK(obj); return (EIO); } if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { VM_OBJECT_WLOCK(obj); vm_page_dirty(m); VM_OBJECT_WUNLOCK(obj); } vm_page_lock(m); vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { /* Requeue to maintain LRU ordering. */ vm_page_requeue(m); } vm_page_unlock(m); return (error); }
int memory_object_control_uiomove( memory_object_control_t control, memory_object_offset_t offset, void * uio, int start_offset, int io_requested, int mark_dirty, int take_reference) { vm_object_t object; vm_page_t dst_page; int xsize; int retval = 0; int cur_run; int cur_needed; int i; int orig_offset; boolean_t make_lru = FALSE; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { return (0); } assert(!object->internal); vm_object_lock(object); if (mark_dirty && object->copy != VM_OBJECT_NULL) { /* * We can't modify the pages without honoring * copy-on-write obligations first, so fall off * this optimized path and fall back to the regular * path. */ vm_object_unlock(object); return 0; } orig_offset = start_offset; while (io_requested && retval == 0) { cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; if (cur_needed > MAX_RUN) cur_needed = MAX_RUN; for (cur_run = 0; cur_run < cur_needed; ) { if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; /* * Sync up on getting the busy bit */ if ((dst_page->busy || dst_page->cleaning)) { /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ if (cur_run) break; PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } /* * this routine is only called when copying * to/from real files... no need to consider * encrypted swap pages */ assert(!dst_page->encrypted); if (mark_dirty) { dst_page->dirty = TRUE; if (dst_page->cs_validated) { /* * CODE SIGNING: * We're modifying a code-signed * page: assume that it is now tainted. */ dst_page->cs_tainted = TRUE; vm_cs_tainted_forces++; } } dst_page->busy = TRUE; page_run[cur_run++] = dst_page; offset += PAGE_SIZE_64; } if (cur_run == 0) /* * we hit a 'hole' in the cache * we bail at this point * we'll unlock the object below */ break; vm_object_unlock(object); for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) break; io_requested -= xsize; start_offset = 0; } vm_object_lock(object); /* * if we have more than 1 page to work on * in the current run, or the original request * started at offset 0 of the page, or we're * processing multiple batches, we will move * the pages to the tail of the inactive queue * to implement an LRU for read/write accesses * * the check for orig_offset == 0 is there to * mitigate the cost of small (< page_size) requests * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { vm_page_lockspin_queues(); make_lru = TRUE; } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; /* * someone is explicitly referencing this page... * update clustered and speculative state * */ VM_PAGE_CONSUME_CLUSTERED(dst_page); if (make_lru == TRUE) vm_page_lru(dst_page); PAGE_WAKEUP_DONE(dst_page); } if (make_lru == TRUE) { vm_page_unlock_queues(); make_lru = FALSE; } orig_offset = 0; } vm_object_unlock(object); return (retval); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m, ma[1]; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) { VM_OBJECT_UNLOCK(object); return (EBUSY); } /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_lookup(object, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { vm_page_sleep(m, "shmtrc"); goto retry; } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, idx); } else /* A cached page was reactivated. */ rv = VM_PAGER_OK; vm_page_lock(m); if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); vm_page_wakeup(m); } else { vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_pager_page_unswapped(m); } } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
/* software page fault handler */ static status_t vm_soft_page_fault(addr_t addr, bool is_write, bool is_exec, bool is_user) { vm_address_space_t *aspace; vm_mapping_t *mapping; vm_upage_t *upage; vm_page_t *page; unsigned long irqstate; status_t err; /* get faulted address space */ if(is_kernel_address(addr)) { aspace = vm_get_kernel_aspace(); } else { aspace = vm_get_current_user_aspace(); if(!aspace) panic("vm_soft_page_fault: no user address space!\n"); } /* increment address space faults counter */ atomic_inc((atomic_t*)&aspace->faults_count); /** TODO: spinlocks are temporary solution here. **/ /* acquire lock before touching address space */ irqstate = spin_lock_irqsave(&aspace->lock); /* get faulted mapping */ err = vm_aspace_get_mapping(aspace, addr, &mapping); if(err != NO_ERROR) panic("vm_soft_page_fault: can't get mapping at address %x, err = %x!\n", addr, err); /* this page fault handler deals only with mapped objects */ if(mapping->type != VM_MAPPING_TYPE_OBJECT) panic("vm_soft_page_fault: wrong mapping type!\n"); /* lock mapped object */ spin_lock(&mapping->object->lock); /* get universal page for mapping its data */ err = vm_object_get_or_add_upage(mapping->object, addr - mapping->start + mapping->offset, &upage); if(err != NO_ERROR) panic("vm_soft_page_fault: can't get upage, err = %x!\n", err); /* allocate new physical page or just map existing page */ if(upage->state == VM_UPAGE_STATE_UNWIRED) { /* upage is not wired with physical page. * so... allocate new one. */ page = vm_page_alloc(VM_PAGE_STATE_CLEAR); if(page == NULL) panic("vm_soft_page_fault: out of physical memory!\n"); /* stick physical page into upage */ upage->state = VM_UPAGE_STATE_RESIDENT; upage->ppn = page->ppn; } else if(upage->state == VM_UPAGE_STATE_RESIDENT) { /* upage has resident physical page. * just get it for further mapping. */ page = vm_page_lookup(upage->ppn); if(page == NULL) panic("vm_soft_page_fault: wrong physical page number!\n"); } else { /* all other upage states is not supported for now */ panic("vm_soft_page_fault: invalid universal page state!\n"); page = NULL; /* keep compiler happy */ } /* now unlock object */ spin_unlock(&mapping->object->lock); /* ... and lock translation map */ aspace->tmap.ops->lock(&aspace->tmap); /* map page into address space */ aspace->tmap.ops->map(&aspace->tmap, addr, PAGE_ADDRESS(page->ppn), mapping->protect); /* unlock translation map */ aspace->tmap.ops->unlock(&aspace->tmap); /* .. and finally unlock address space */ spin_unlock_irqrstor(&aspace->lock, irqstate); /* return address space to the kernel */ vm_put_aspace(aspace); /* page fault handled successfully */ return NO_ERROR; }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m; vm_pindex_t nobjsize; vm_ooffset_t delta; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, FALSE); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_uid(delta, object->uip); object->charge -= delta; /* * If the last page is partially mapped, then zero out * the garbage at the end of the page. See comments * in vnode_pager_setsize() for more details. * * XXXJHB: This handles in memory pages, but what about * a page swapped out to disk? */ if ((length & PAGE_MASK) && (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && m->valid != 0) { int base = (int)length & PAGE_MASK; int size = PAGE_SIZE - base; pmap_zero_page_area(m, base, size); /* * Update the valid bits to reflect the blocks that * have been zeroed. Some of these valid bits may * have already been set. */ vm_page_set_valid(m, base, size); /* * Round "base" to the next block boundary so that the * dirty bit for a partially zeroed block is not * cleared. */ base = roundup2(base, DEV_BSIZE); vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); vm_page_unlock_queues(); } else if ((length & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(length), nobjsize); } } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_uid(delta, object->uip)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
static int ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct ttm_buffer_object *bo = vm_obj->handle; struct ttm_bo_device *bdev = bo->bdev; struct ttm_tt *ttm = NULL; vm_page_t m, m1, oldm; int ret; int retval = VM_PAGER_OK; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; vm_object_pip_add(vm_obj, 1); oldm = *mres; if (oldm != NULL) { vm_page_lock(oldm); vm_page_remove(oldm); vm_page_unlock(oldm); *mres = NULL; } else oldm = NULL; retry: VM_OBJECT_WUNLOCK(vm_obj); m = NULL; reserve: ret = ttm_bo_reserve(bo, false, false, false, 0); if (unlikely(ret != 0)) { if (ret == -EBUSY) { kern_yield(0); goto reserve; } } if (bdev->driver->fault_reserve_notify) { ret = bdev->driver->fault_reserve_notify(bo); switch (ret) { case 0: break; case -EBUSY: case -ERESTART: case -EINTR: kern_yield(0); goto reserve; default: retval = VM_PAGER_ERROR; goto out_unlock; } } /* * Wait for buffer data in transit, due to a pipelined * move. */ mtx_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { /* * Here, the behavior differs between Linux and FreeBSD. * * On Linux, the wait is interruptible (3rd argument to * ttm_bo_wait). There must be some mechanism to resume * page fault handling, once the signal is processed. * * On FreeBSD, the wait is uninteruptible. This is not a * problem as we can't end up with an unkillable process * here, because the wait will eventually time out. * * An example of this situation is the Xorg process * which uses SIGALRM internally. The signal could * interrupt the wait, causing the page fault to fail * and the process to receive SIGSEGV. */ ret = ttm_bo_wait(bo, false, false, false); mtx_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } } else mtx_unlock(&bdev->fence_lock); ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } ret = ttm_mem_io_reserve_vm(bo); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } /* * Strictly, we're not allowed to modify vma->vm_page_prot here, * since the mmap_sem is only held in read mode. However, we * modify only the caching bits of vma->vm_page_prot and * consider those bits protected by * the bo->mutex, as we should be the only writers. * There shouldn't really be any readers of these bits except * within vm_insert_mixed()? fork? * * TODO: Add a list of vmas to the bo, and change the * vma->vm_page_prot when the object changes caching policy, with * the correct locks held. */ if (!bo->mem.bus.is_iomem) { /* Allocate all page at once, most common usage */ ttm = bo->ttm; if (ttm->bdev->driver->ttm_tt_populate(ttm)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } } if (bo->mem.bus.is_iomem) { m = PHYS_TO_VM_PAGE(bo->mem.bus.base + bo->mem.bus.offset + offset); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("physical address %#jx not fictitious", (uintmax_t)(bo->mem.bus.base + bo->mem.bus.offset + offset))); pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement)); } else { ttm = bo->ttm; m = ttm->pages[OFF_TO_IDX(offset)]; if (unlikely(!m)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } pmap_page_set_memattr(m, (bo->mem.placement & TTM_PL_FLAG_CACHED) ? VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement)); } VM_OBJECT_WLOCK(vm_obj); if (vm_page_busied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(vm_obj); vm_page_busy_sleep(m, "ttmpbs"); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { VM_OBJECT_WUNLOCK(vm_obj); VM_WAIT; VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } } else { KASSERT(m == m1, ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } m->valid = VM_PAGE_BITS_ALL; *mres = m; vm_page_xbusy(m); if (oldm != NULL) { vm_page_lock(oldm); vm_page_free(oldm); vm_page_unlock(oldm); } out_io_unlock1: ttm_mem_io_unlock(man); out_unlock1: ttm_bo_unreserve(bo); vm_object_pip_wakeup(vm_obj); return (retval); out_io_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_io_unlock1; out_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_unlock1; }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&map->token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? * * XXX vm_token - legacy for pmap_ts_referenced * in i386 and vkernel pmap code. */ lwkt_gettoken(&vm_token); vm_object_hold(current->object.vm_object); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } vm_object_drop(current->object.vm_object); lwkt_reltoken(&vm_token); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&map->token); return (error); }
DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage) { PRTR0MEMOBJFREEBSD pMemFreeBSD = (PRTR0MEMOBJFREEBSD)pMem; switch (pMemFreeBSD->Core.enmType) { case RTR0MEMOBJTYPE_LOCK: { if ( pMemFreeBSD->Core.u.Lock.R0Process != NIL_RTR0PROCESS && pMemFreeBSD->Core.u.Lock.R0Process != (RTR0PROCESS)curproc) { /* later */ return NIL_RTHCPHYS; } vm_offset_t pb = (vm_offset_t)pMemFreeBSD->Core.pv + ptoa(iPage); struct proc *pProc = (struct proc *)pMemFreeBSD->Core.u.Lock.R0Process; struct vm_map *pProcMap = &pProc->p_vmspace->vm_map; pmap_t pPhysicalMap = vm_map_pmap(pProcMap); return pmap_extract(pPhysicalMap, pb); } case RTR0MEMOBJTYPE_MAPPING: { vm_offset_t pb = (vm_offset_t)pMemFreeBSD->Core.pv + ptoa(iPage); if (pMemFreeBSD->Core.u.Mapping.R0Process != NIL_RTR0PROCESS) { struct proc *pProc = (struct proc *)pMemFreeBSD->Core.u.Mapping.R0Process; struct vm_map *pProcMap = &pProc->p_vmspace->vm_map; pmap_t pPhysicalMap = vm_map_pmap(pProcMap); return pmap_extract(pPhysicalMap, pb); } return vtophys(pb); } case RTR0MEMOBJTYPE_PAGE: case RTR0MEMOBJTYPE_LOW: case RTR0MEMOBJTYPE_PHYS_NC: { RTHCPHYS addr; #if __FreeBSD_version >= 1000030 VM_OBJECT_WLOCK(pMemFreeBSD->pObject); #else VM_OBJECT_LOCK(pMemFreeBSD->pObject); #endif addr = VM_PAGE_TO_PHYS(vm_page_lookup(pMemFreeBSD->pObject, iPage)); #if __FreeBSD_version >= 1000030 VM_OBJECT_WUNLOCK(pMemFreeBSD->pObject); #else VM_OBJECT_UNLOCK(pMemFreeBSD->pObject); #endif return addr; } case RTR0MEMOBJTYPE_PHYS: return pMemFreeBSD->Core.u.Cont.Phys + ptoa(iPage); case RTR0MEMOBJTYPE_CONT: return pMemFreeBSD->Core.u.Phys.PhysBase + ptoa(iPage); case RTR0MEMOBJTYPE_RES_VIRT: default: return NIL_RTHCPHYS; } }
static int tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; vm_page_t vpg, tpg; vm_offset_t offset; off_t addr; size_t tlen; int error, rv; error = 0; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0 && vobj->cache == NULL)) { vpg = NULL; goto nocache; } VM_OBJECT_LOCK(vobj); lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { if ((vpg->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(vpg); vm_page_sleep(vpg, "tmfsmw"); goto lookupvpg; } vm_page_busy(vpg); vm_page_undirty(vpg); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&vpg, offset, tlen, uio); } else { if (__predict_false(vobj->cache != NULL)) vm_page_cache_free(vobj, idx, idx + 1); VM_OBJECT_UNLOCK(vobj); vpg = NULL; } nocache: VM_OBJECT_LOCK(tobj); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(tpg); vm_page_free(tpg); vm_page_unlock(tpg); error = EIO; goto out; } } else vm_page_zero_invalid(tpg, TRUE); } VM_OBJECT_UNLOCK(tobj); if (vpg == NULL) error = uiomove_fromphys(&tpg, offset, tlen, uio); else { KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid")); pmap_copy_page(vpg, tpg); } VM_OBJECT_LOCK(tobj); if (error == 0) { KASSERT(tpg->valid == VM_PAGE_BITS_ALL, ("parts of tpg invalid")); vm_page_dirty(tpg); } vm_page_lock(tpg); vm_page_unwire(tpg, TRUE); vm_page_unlock(tpg); vm_page_wakeup(tpg); out: VM_OBJECT_UNLOCK(tobj); if (vpg != NULL) { VM_OBJECT_LOCK(vobj); vm_page_wakeup(vpg); VM_OBJECT_UNLOCK(vobj); } return (error); }
static int tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { struct sf_buf *sf; vm_pindex_t idx; vm_page_t m; vm_offset_t offset; off_t addr; size_t tlen; char *ma; int error; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0 && vobj->cache == NULL)) goto nocache; VM_OBJECT_LOCK(vobj); lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { if ((m->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(m); vm_page_sleep(m, "tmfsmr"); goto lookupvpg; } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(vobj); vm_page_wakeup(m); VM_OBJECT_UNLOCK(vobj); return (error); } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { KASSERT(offset == 0, ("unexpected offset in tmpfs_mappedread for sendfile")); if ((m->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(m); vm_page_sleep(m, "tmfsmr"); goto lookupvpg; } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); sched_pin(); sf = sf_buf_alloc(m, SFB_CPUPRIVATE); ma = (char *)sf_buf_kva(sf); error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma); if (error == 0) { if (tlen != PAGE_SIZE) bzero(ma + tlen, PAGE_SIZE - tlen); uio->uio_offset += tlen; uio->uio_resid -= tlen; } sf_buf_free(sf); sched_unpin(); VM_OBJECT_LOCK(vobj); if (error == 0) m->valid = VM_PAGE_BITS_ALL; vm_page_wakeup(m); VM_OBJECT_UNLOCK(vobj); return (error); } VM_OBJECT_UNLOCK(vobj); nocache: error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio); return (error); }