static void free_pagelist(PAGELIST_T *pagelist, int actual) { vm_page_t *pages; unsigned int num_pages, i; vcos_log_trace("free_pagelist - %x, %d", (unsigned int)pagelist, actual); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / PAGE_SIZE; pages = (vm_page_t *)(pagelist->addrs + num_pages); /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { FRAGMENTS_T *fragments = g_fragments_base + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); int head_bytes, tail_bytes; if (actual >= 0) { /* XXXBSD: might be inefficient */ void *page_address = pmap_mapdev(VM_PAGE_TO_PHYS(pages[0]), PAGE_SIZE*num_pages); if ((head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & (CACHE_LINE_SIZE - 1)) != 0) { if (head_bytes > actual) head_bytes = actual; memcpy((char *)page_address + pagelist->offset, fragments->headbuf, head_bytes); } if ((head_bytes < actual) && (tail_bytes = (pagelist->offset + actual) & (CACHE_LINE_SIZE - 1)) != 0) { memcpy((char *)page_address + PAGE_SIZE*(num_pages - 1) + ((pagelist->offset + actual) & (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)), fragments->tailbuf, tail_bytes); } pmap_qremove((vm_offset_t)page_address, PAGE_SIZE*num_pages); } mtx_lock(&g_free_fragments_mutex); *(FRAGMENTS_T **) fragments = g_free_fragments; g_free_fragments = fragments; mtx_unlock(&g_free_fragments_mutex); sema_post(&g_free_fragments_sema); } for (i = 0; i < num_pages; i++) { if (pagelist->type != PAGELIST_WRITE) vm_page_dirty(pages[i]); } vm_page_unhold_pages(pages, num_pages); free(pagelist, M_VCPAGELIST); }
/* * Copy a binary buffer from kernel space to user space. * * Returns 0 on success, EFAULT on failure. */ int copyout(const void *kaddr, void *udaddr, size_t len) { struct vmspace *vm = curproc->p_vmspace; struct lwbuf *lwb; struct lwbuf lwb_cache; vm_page_t m; int error; size_t n; error = 0; while (len) { m = vm_fault_page(&vm->vm_map, trunc_page((vm_offset_t)udaddr), VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_NORMAL, &error); if (error) break; n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK); if (n > len) n = len; lwb = lwbuf_alloc(m, &lwb_cache); bcopy(kaddr, (char *)lwbuf_kva(lwb) + ((vm_offset_t)udaddr & PAGE_MASK), n); len -= n; udaddr = (char *)udaddr + n; kaddr = (const char *)kaddr + n; vm_page_dirty(m); lwbuf_free(lwb); vm_page_unhold(m); } return (error); }
/* * Identify the physical page mapped at the given kernel virtual * address. Insert this physical page into the given address space at * the given virtual address, replacing the physical page, if any, * that already exists there. */ static int vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobject; vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, ("vm_pgmoveco: uaddr is not page aligned")); /* * Herein the physical page is validated and dirtied. It is * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; vm_page_lock_queues(); pmap_remove_all(user_pg); vm_page_free(user_pg); } else { /* * Even if a physical page does not exist in the * object chain's first object, a physical page from a * backing object may be mapped read only. */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); vm_page_lock_queues(); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); }
/* * Cleanup an XIO so it can be destroyed. The pages associated with the * XIO are released. */ void xio_release(xio_t xio) { int i; vm_page_t m; for (i = 0; i < xio->xio_npages; ++i) { m = xio->xio_pages[i]; if (xio->xio_flags & XIOF_WRITE) vm_page_dirty(m); vm_page_unhold(m); } xio->xio_offset = 0; xio->xio_npages = 0; xio->xio_bytes = 0; xio->xio_error = ENOBUFS; }
void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { int page_count = obj->base.size >> PAGE_SHIFT; int i; if (obj->bit_17 == NULL) return; for (i = 0; i < page_count; i++) { char new_bit_17 = VM_PAGE_TO_PHYS(obj->pages[i]) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(obj->pages[i]); vm_page_dirty(obj->pages[i]); } } }
int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage) { vm_object_t obj; vm_page_t from_page, to_page; int i; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); if (!persistent_swap_storage) { obj = swap_pager_alloc(NULL, IDX_TO_OFF(ttm->num_pages), VM_PROT_DEFAULT, 0); if (obj == NULL) { pr_err("Failed allocating swap storage\n"); return (-ENOMEM); } } else obj = persistent_swap_storage; VM_OBJECT_LOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); pmap_copy_page(VM_PAGE_TO_PHYS(from_page), VM_PAGE_TO_PHYS(to_page)); to_page->valid = VM_PAGE_BITS_ALL; vm_page_dirty(to_page); vm_page_wakeup(to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_UNLOCK(obj); ttm->bdev->driver->ttm_tt_unpopulate(ttm); ttm->swap_storage = obj; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistent_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP; return 0; }
int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage) { vm_object_t obj; vm_page_t from_page, to_page; int i; MPASS(ttm->state == tt_unbound || ttm->state == tt_unpopulated); MPASS(ttm->caching_state == tt_cached); if (persistent_swap_storage == NULL) { obj = vm_pager_allocate(OBJT_SWAP, NULL, IDX_TO_OFF(ttm->num_pages), VM_PROT_DEFAULT, 0, curthread->td_ucred); if (obj == NULL) { printf("[TTM] Failed allocating swap storage\n"); return (-ENOMEM); } } else obj = persistent_swap_storage; VM_OBJECT_WLOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL); pmap_copy_page(from_page, to_page); to_page->valid = VM_PAGE_BITS_ALL; vm_page_dirty(to_page); vm_page_xunbusy(to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_WUNLOCK(obj); ttm->bdev->driver->ttm_tt_unpopulate(ttm); ttm->swap_storage = obj; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistent_swap_storage != NULL) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP; return (0); }
/* * Cleanup an XIO so it can be destroyed. The pages associated with the * XIO are released. */ void xio_release(xio_t xio) { int i; vm_page_t m; lwkt_gettoken(&vm_token); crit_enter(); for (i = 0; i < xio->xio_npages; ++i) { m = xio->xio_pages[i]; if (xio->xio_flags & XIOF_WRITE) vm_page_dirty(m); vm_page_unhold(m); } crit_exit(); lwkt_reltoken(&vm_token); xio->xio_offset = 0; xio->xio_npages = 0; xio->xio_bytes = 0; xio->xio_error = ENOBUFS; }
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; long ahead, behind; int alloc_req, era, faultcount, nera, reqpage, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; int locked, error; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = reqpage = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ VM_OBJECT_WLOCK(fs.first_object); vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is dead, we stop here */ if (fs.object->flags & OBJ_DEAD) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * check for page-based copy on write. * We check fs.object == fs.first_object so * as to ensure the legacy COW mechanism is * used when the page in question is part of * a shadow object. Otherwise, vm_page_cowfault() * removes the page from the backing object, * which is not what we want. */ vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } /* * Wait/Retry if the page is busy. We have to do this * if the page is busy via either VPO_BUSY or * vm_page_t->busy because the vm_pager may be using * vm_page_t->busy for pageouts ( and even pageins if * it is the vnode pager ), and we could end up trying * to pagein and pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a vm_page_t->busy page except, perhaps, * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, TRUE, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_page_remque(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_busy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } /* * Page is not resident, If this is the search termination * or the pager might contain the page, allocate a new page. */ if (TRYPAGER || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ fs.m = NULL; if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 if ((fs.object->flags & OBJ_COLORED) == 0) { fs.object->flags |= OBJ_COLORED; fs.object->pg_color = atop(vaddr) - fs.pindex; } #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have found a valid page or we have allocated a new page. * The page thus may not be valid or may not be entirely * valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (TRYPAGER) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; ahead = atop(fs.entry->end - vaddr) - 1; if (ahead > VM_FAULT_READ_AHEAD_MAX) ahead = VM_FAULT_READ_AHEAD_MAX; if (fs.pindex == fs.entry->next_read) vm_fault_cache_behind(&fs, VM_FAULT_READ_MAX); } else { /* * If this is a sequential page fault, then * arithmetically increase the number of pages * in the read-ahead window. Otherwise, reset * the read-ahead window to its smallest size. */ behind = atop(vaddr - fs.entry->start); if (behind > VM_FAULT_READ_BEHIND) behind = VM_FAULT_READ_BEHIND; ahead = atop(fs.entry->end - vaddr) - 1; era = fs.entry->read_ahead; if (fs.pindex == fs.entry->next_read) { nera = era + behind; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; behind = 0; if (ahead > nera) ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_cache_behind(&fs, VM_FAULT_CACHE_BEHIND); } else if (ahead > VM_FAULT_READ_AHEAD_MIN) ahead = VM_FAULT_READ_AHEAD_MIN; if (era != ahead) fs.entry->read_ahead = ahead; } /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are VPO_BUSY'd. */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * now we find out if any other pages should be paged * in at this time this routine checks to see if the * pages surrounding this fault reside in the same * object as the page for this fault. If they do, * then they are faulted in also into the object. The * array "marray" returned contains an array of * vm_page_t structs where one of them is the * vm_page_t passed to the routine. The reqpage * return value is the index into the marray for the * vm_page_t passed to the routine. * * fs.m plus the additional pages are VPO_BUSY'd. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); rv = faultcount ? vm_pager_get_pages(fs.object, marray, faultcount, reqpage) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* * Found the page. Leave it busy while we play * with it. */ /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (!fs.m) { unlock_and_deallocate(&fs); goto RetryFault; } hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } /* * Remove the bogus page (which does not exist at this * object/offset); before doing so, we must get back * our object lock to preserve our invariant. * * Also wake up any other process that may want to bring * in this page. * * If this is the top-level object, we must leave the * busy page to prevent another process from rushing * past us, and inserting the page in that object at * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * Data outside the range of the pager or an I/O error */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this * point, m has been freed and is invalid! */ } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } KASSERT((fs.m->oflags & VPO_BUSY) != 0, ("vm_fault: not busy after main loop")); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * get rid of the unnecessary page */ vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. Since the pager does not return the * actual set of pages that it read, this update is based on * the requested set. Typically, the requested and actual * sets are the same. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + faultcount - reqpage; if ((prot & VM_PROT_WRITE) != 0 || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { if (fs.m->dirty == 0) fs.m->oflags |= VPO_NOSYNC; } else { fs.m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } } /* * Page had better still be busy */ KASSERT(fs.m->oflags & VPO_BUSY, ("vm_fault: page %p not busy!", fs.m)); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { PCPU_INC(cnt.v_io_faults); curthread->td_ru.ru_majflt++; } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); }
/* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); count = howmany(end - addr, PAGE_SIZE); if (count > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); } return (-1); }
/* * This routine takes a user's map, array of pages, number of pages, and flags * and then does the following: * - validate that the user has access to those pages (flags indicates read * or write) - if not fail * - validate that count is enough to hold range number of pages - if not fail * - fault in any non-resident pages * - if the user is doing a read force a write fault for any COWed pages * - if the user is doing a read mark all pages as dirty * - hold all pages */ int vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr, vm_page_t *mp, int count, vm_prot_t prot) { vm_offset_t end, va; int faults, rv; pmap_t pmap; vm_page_t m, *pages; pmap = vm_map_pmap(map); pages = mp; addr &= ~PAGE_MASK; /* * Check that virtual address range is legal * This check is somewhat bogus as on some architectures kernel * and user do not share VA - however, it appears that all FreeBSD * architectures define it */ end = addr + (count * PAGE_SIZE); if (end > VM_MAXUSER_ADDRESS) { log(LOG_WARNING, "bad address passed to vm_fault_hold_user_pages"); return (EFAULT); } /* * First optimistically assume that all pages are resident * (and R/W if for write) if so just mark pages as held (and * dirty if for write) and return */ vm_page_lock_queues(); for (pages = mp, faults = 0, va = addr; va < end; va += PAGE_SIZE, pages++) { /* * page queue mutex is recursable so this is OK * it would be really nice if we had an unlocked * version of this so we were only acquiring the * pmap lock 1 time as opposed to potentially * many dozens of times */ *pages = m = pmap_extract_and_hold(pmap, va, prot); if (m == NULL) { faults++; continue; } /* * Preemptively mark dirty - the pages * will never have the modified bit set if * they are only changed via DMA */ if (prot & VM_PROT_WRITE) vm_page_dirty(m); } vm_page_unlock_queues(); if (faults == 0) return (0); /* * Pages either have insufficient permissions or are not present * trigger a fault where neccessary * */ rv = 0; for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) { /* * Account for a very narrow race where the page may be * taken away from us before it is held */ while (*pages == NULL) { rv = vm_fault(map, va, prot, (prot & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); if (rv) goto error; *pages = pmap_extract_and_hold(pmap, va, prot); } } return (0); error: log(LOG_WARNING, "vm_fault bad return rv=%d va=0x%zx\n", rv, va); vm_page_lock_queues(); for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) if (*pages) { vm_page_unhold(*pages); *pages = NULL; } vm_page_unlock_queues(); return (EFAULT); }
static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; vm_page_t m, ma[1]; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); if (length == shmfd->shm_size) { VM_OBJECT_UNLOCK(object); return (0); } nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) { VM_OBJECT_UNLOCK(object); return (EBUSY); } /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_lookup(object, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { vm_page_sleep(m, "shmtrc"); goto retry; } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, idx); } else /* A cached page was reactivated. */ rv = VM_PAGER_OK; vm_page_lock(m); if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); vm_page_wakeup(m); } else { vm_page_free(m); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_pager_page_unswapped(m); } } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) { VM_OBJECT_UNLOCK(object); return (ENOMEM); } object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; VM_OBJECT_UNLOCK(object); return (0); }
static int tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; vm_page_t vpg, tpg; vm_offset_t offset; off_t addr; size_t tlen; int error, rv; error = 0; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0 && vobj->cache == NULL)) { vpg = NULL; goto nocache; } VM_OBJECT_LOCK(vobj); lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { if ((vpg->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and sleeping so * that the page daemon is less likely to reclaim it. */ vm_page_reference(vpg); vm_page_sleep(vpg, "tmfsmw"); goto lookupvpg; } vm_page_busy(vpg); vm_page_undirty(vpg); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&vpg, offset, tlen, uio); } else { if (__predict_false(vobj->cache != NULL)) vm_page_cache_free(vobj, idx, idx + 1); VM_OBJECT_UNLOCK(vobj); vpg = NULL; } nocache: VM_OBJECT_LOCK(tobj); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(tpg); vm_page_free(tpg); vm_page_unlock(tpg); error = EIO; goto out; } } else vm_page_zero_invalid(tpg, TRUE); } VM_OBJECT_UNLOCK(tobj); if (vpg == NULL) error = uiomove_fromphys(&tpg, offset, tlen, uio); else { KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid")); pmap_copy_page(vpg, tpg); } VM_OBJECT_LOCK(tobj); if (error == 0) { KASSERT(tpg->valid == VM_PAGE_BITS_ALL, ("parts of tpg invalid")); vm_page_dirty(tpg); } vm_page_lock(tpg); vm_page_unwire(tpg, TRUE); vm_page_unlock(tpg); vm_page_wakeup(tpg); out: VM_OBJECT_UNLOCK(tobj); if (vpg != NULL) { VM_OBJECT_LOCK(vobj); vm_page_wakeup(vpg); VM_OBJECT_UNLOCK(vobj); } return (error); }
static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Parallel reads of the page content from disk are prevented * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { rv = vm_pager_get_pages(obj, &m, 1, 0); m = vm_page_lookup(obj, idx); if (m == NULL) { printf( "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n", obj, idx, rv); VM_OBJECT_WUNLOCK(obj); return (EIO); } if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } } else vm_page_zero_invalid(m, TRUE); } vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { VM_OBJECT_WLOCK(obj); vm_page_dirty(m); VM_OBJECT_WUNLOCK(obj); } vm_page_lock(m); vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { /* Requeue to maintain LRU ordering. */ vm_page_requeue(m); } vm_page_unlock(m); return (error); }
static void free_pagelist(BULKINFO_T *bi, int actual) { vm_page_t*pages; unsigned int num_pages, i; void *page_address; PAGELIST_T *pagelist; pagelist = bi->pagelist; vchiq_log_trace(vchiq_arm_log_level, "free_pagelist - %x, %d", (unsigned int)pagelist, actual); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / PAGE_SIZE; pages = (vm_page_t*)(pagelist->addrs + num_pages); /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { FRAGMENTS_T *fragments = g_fragments_base + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); int head_bytes, tail_bytes; head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & (CACHE_LINE_SIZE - 1); tail_bytes = (pagelist->offset + actual) & (CACHE_LINE_SIZE - 1); if ((actual >= 0) && (head_bytes != 0)) { if (head_bytes > actual) head_bytes = actual; memcpy((char *)bi->buf, fragments->headbuf, head_bytes); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { memcpy((char *)bi->buf + actual - tail_bytes, fragments->tailbuf, tail_bytes); } down(&g_free_fragments_mutex); *(FRAGMENTS_T **) fragments = g_free_fragments; g_free_fragments = fragments; up(&g_free_fragments_mutex); up(&g_free_fragments_sema); } for (i = 0; i < num_pages; i++) { if (pagelist->type != PAGELIST_WRITE) vm_page_dirty(pages[i]); } vm_page_unhold_pages(pages, num_pages); bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dma_tag_destroy(bi->pagelist_dma_tag); free(bi, M_VCPAGELIST); }