/* * Return the requested word from the user-space address. * Returns 0 if it isn't mapped. (For what we're using it * for, if the actual value is 0, that's equivalent to that.) */ static caddr_t GET_WORD(pmap_t map, caddr_t virtual_addr) { caddr_t retval = 0; vm_page_t page; int err; caddr_t old_fault; page = pmap_extract_and_hold(map, (vm_offset_t)virtual_addr, VM_PROT_READ); if (page == 0) { return 0; } // I do this because copyin/copyout aren't re-entrant. old_fault = curpcb->pcb_onfault; err = copyin(virtual_addr, &retval, sizeof(retval)); curpcb->pcb_onfault = old_fault; if (err != 0) { #if SAMPLE_DEBUG printf("%s(%d): copyin(%p, %p, %zd) failed: %d\n", __FUNCTION__, __LINE__, (void*)virtual_addr, &retval, sizeof(retval), err); #endif retval = 0; } vm_page_lock(page); vm_page_unhold(page); vm_page_unlock(page); return retval; }
/* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); count = howmany(end - addr, PAGE_SIZE); if (count > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); } return (-1); }
int socow_setup(struct mbuf *m0, struct uio *uio) { struct sf_buf *sf; vm_page_t pp; struct iovec *iov; struct vmspace *vmspace; struct vm_map *map; vm_offset_t offset, uva; socow_stats.attempted++; vmspace = curproc->p_vmspace; map = &vmspace->vm_map; uva = (vm_offset_t) uio->uio_iov->iov_base; offset = uva & PAGE_MASK; /* * Verify that access to the given address is allowed from user-space. */ if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0) return (0); /* * verify page is mapped & not already wired for i/o */ pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ); if (pp == NULL) { socow_stats.fail_not_mapped++; return(0); } /* * set up COW */ vm_page_lock(pp); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); vm_page_unlock(pp); return (0); } /* * wire the page for I/O */ vm_page_wire(pp); vm_page_unhold(pp); vm_page_unlock(pp); /* * Allocate an sf buf */ sf = sf_buf_alloc(pp, SFB_CATCH); if (sf == NULL) { vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); vm_page_unlock(pp); socow_stats.fail_sf_buf++; return(0); } /* * attach to mbuf */ MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); m0->m_len = PAGE_SIZE - offset; m0->m_data = (caddr_t)sf_buf_kva(sf) + offset; socow_stats.success++; iov = uio->uio_iov; iov->iov_base = (char *)iov->iov_base + m0->m_len; iov->iov_len -= m0->m_len; uio->uio_resid -= m0->m_len; uio->uio_offset += m0->m_len; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; } return(m0->m_len); }
/* * This routine takes a user's map, array of pages, number of pages, and flags * and then does the following: * - validate that the user has access to those pages (flags indicates read * or write) - if not fail * - validate that count is enough to hold range number of pages - if not fail * - fault in any non-resident pages * - if the user is doing a read force a write fault for any COWed pages * - if the user is doing a read mark all pages as dirty * - hold all pages */ int vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr, vm_page_t *mp, int count, vm_prot_t prot) { vm_offset_t end, va; int faults, rv; pmap_t pmap; vm_page_t m, *pages; pmap = vm_map_pmap(map); pages = mp; addr &= ~PAGE_MASK; /* * Check that virtual address range is legal * This check is somewhat bogus as on some architectures kernel * and user do not share VA - however, it appears that all FreeBSD * architectures define it */ end = addr + (count * PAGE_SIZE); if (end > VM_MAXUSER_ADDRESS) { log(LOG_WARNING, "bad address passed to vm_fault_hold_user_pages"); return (EFAULT); } /* * First optimistically assume that all pages are resident * (and R/W if for write) if so just mark pages as held (and * dirty if for write) and return */ vm_page_lock_queues(); for (pages = mp, faults = 0, va = addr; va < end; va += PAGE_SIZE, pages++) { /* * page queue mutex is recursable so this is OK * it would be really nice if we had an unlocked * version of this so we were only acquiring the * pmap lock 1 time as opposed to potentially * many dozens of times */ *pages = m = pmap_extract_and_hold(pmap, va, prot); if (m == NULL) { faults++; continue; } /* * Preemptively mark dirty - the pages * will never have the modified bit set if * they are only changed via DMA */ if (prot & VM_PROT_WRITE) vm_page_dirty(m); } vm_page_unlock_queues(); if (faults == 0) return (0); /* * Pages either have insufficient permissions or are not present * trigger a fault where neccessary * */ rv = 0; for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) { /* * Account for a very narrow race where the page may be * taken away from us before it is held */ while (*pages == NULL) { rv = vm_fault(map, va, prot, (prot & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); if (rv) goto error; *pages = pmap_extract_and_hold(pmap, va, prot); } } return (0); error: log(LOG_WARNING, "vm_fault bad return rv=%d va=0x%zx\n", rv, va); vm_page_lock_queues(); for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) if (*pages) { vm_page_unhold(*pages); *pages = NULL; } vm_page_unlock_queues(); return (EFAULT); }