static int ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs, int *npages, int centeridx, vm_prot_t access_type, int advice, int flags) { struct vm_page *pg; int i; if (centeridx) panic("%s: centeridx != 0 not supported", __func__); /* loop over pages */ off = trunc_page(off); for (i = 0; i < *npages; i++) { retrylookup: pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT)); if (pg) { if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "aogetpg", 0); goto retrylookup; } pg->flags |= PG_BUSY; pgs[i] = pg; } else { pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT)); pgs[i] = pg; } } mutex_exit(&uobj->vmobjlock); return 0; }
/* * uvm_km_pgremove: remove pages from a kernel uvm_object. * * => when you unmap a part of anonymous kernel memory you want to toss * the pages right away. (this gets called from uvm_unmap_...). */ void uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) { struct vm_page *pp; voff_t curoff; UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist); KASSERT(uobj->pgops == &aobj_pager); for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { pp = uvm_pagelookup(uobj, curoff); if (pp == NULL) continue; UVMHIST_LOG(maphist," page %p, busy=%ld", pp, pp->pg_flags & PG_BUSY, 0, 0); if (pp->pg_flags & PG_BUSY) { /* owner must check for this when done */ atomic_setbits_int(&pp->pg_flags, PG_RELEASED); } else { /* free the swap slot... */ uao_dropswap(uobj, curoff >> PAGE_SHIFT); /* * ...and free the page; note it may be on the * active or inactive queues. */ uvm_lock_pageq(); uvm_pagefree(pp); uvm_unlock_pageq(); } } }
void buf_free_pages(struct buf *bp) { struct uvm_object *uobj = bp->b_pobj; struct vm_page *pg; voff_t off, i; int s; KASSERT(bp->b_data == NULL); KASSERT(uobj != NULL); s = splbio(); off = bp->b_poffs; bp->b_pobj = NULL; bp->b_poffs = 0; mtx_enter(&uobj->vmobjlock); for (i = 0; i < atop(bp->b_bufsize); i++) { pg = uvm_pagelookup(uobj, off + ptoa(i)); KASSERT(pg != NULL); KASSERT(pg->wire_count == 1); pg->wire_count = 0; /* Never on a pageq, no pageqlock needed. */ uvm_pagefree(pg); bcstats.numbufpages--; } mtx_leave(&uobj->vmobjlock); splx(s); }
void buf_free_pages(struct buf *bp) { struct uvm_object *uobj = bp->b_pobj; struct vm_page *pg; voff_t off, i; int s; KASSERT(bp->b_data == NULL); KASSERT(uobj != NULL); s = splbio(); off = bp->b_poffs; bp->b_pobj = NULL; bp->b_poffs = 0; for (i = 0; i < atop(bp->b_bufsize); i++) { pg = uvm_pagelookup(uobj, off + ptoa(i)); KASSERT(pg != NULL); KASSERT(pg->wire_count == 1); pg->wire_count = 0; uvm_pagefree(pg); bcstats.numbufpages--; } splx(s); }
void buf_map(struct buf *bp) { vaddr_t va; splassert(IPL_BIO); if (bp->b_data == NULL) { unsigned long i; /* * First, just use the pre-allocated space until we run out. */ if (buf_kva_start < buf_kva_end) { va = buf_kva_start; buf_kva_start += MAXPHYS; bcstats.kvaslots_avail--; } else { struct buf *vbp; /* * Find some buffer we can steal the space from. */ while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) { buf_needva++; buf_nkvmsleep++; tsleep(&buf_needva, PRIBIO, "buf_needva", 0); } va = buf_unmap(vbp); } mtx_enter(&bp->b_pobj->vmobjlock); for (i = 0; i < atop(bp->b_bufsize); i++) { struct vm_page *pg = uvm_pagelookup(bp->b_pobj, bp->b_poffs + ptoa(i)); KASSERT(pg != NULL); pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE); } mtx_leave(&bp->b_pobj->vmobjlock); pmap_update(pmap_kernel()); bp->b_data = (caddr_t)va; } else { TAILQ_REMOVE(&buf_valist, bp, b_valist); bcstats.kvaslots_avail--; } bcstats.busymapped++; CLR(bp->b_flags, B_NOTMAPPED); }
void uvm_objunwire(struct uvm_object *uobj, voff_t start, voff_t end) { struct vm_page *pg; off_t offset; uvm_lock_pageq(); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); KASSERT(pg != NULL); KASSERT(!(pg->pg_flags & PG_RELEASED)); uvm_pageunwire(pg); } uvm_unlock_pageq(); }
/* ARGSUSED */ int sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retval) { /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ struct proc *p = l->l_proc; struct vm_page *pg; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; struct vm_map_entry *entry; vaddr_t start, end, lim; struct vm_map *map; vsize_t len; int error = 0, npgs; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return EINVAL; len = round_page(len); end = start + len; if (end <= start) return EINVAL; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ npgs = len >> PAGE_SHIFT; error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); if (error) { return error; } vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == false) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) subyte(vec, 1); continue; } } amap = entry->aref.ar_amap; /* upper layer */ uobj = entry->object.uvm_obj; /* lower layer */ if (amap != NULL) amap_lock(amap); if (uobj != NULL) mutex_enter(uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the upper layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the lower layer. */ pg = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (pg != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } (void) subyte(vec, pgi); } if (uobj != NULL) mutex_exit(uobj->vmobjlock); if (amap != NULL) amap_unlock(amap); } out: vm_map_unlock_read(map); uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); return error; }
/* * miscfs/genfs getpages routine. This is a fair bit simpler than the * kernel counterpart since we're not being executed from a fault handler * and generally don't need to care about PGO_LOCKED or other cruft. * We do, however, need to care about page locking and we keep trying until * we get all the pages within the range. The object locking protocol * is the same as for the kernel: enter with the object lock held, * return with it released. */ int genfs_getpages(void *v) { struct vop_getpages_args /* { struct vnode *a_vp; voff_t a_offset; struct vm_page **a_m; int *a_count; int a_centeridx; vm_prot_t a_access_type; int a_advice; int a_flags; } */ *ap = v; struct vnode *vp = ap->a_vp; struct uvm_object *uobj = (struct uvm_object *)vp; struct vm_page *pg; voff_t curoff, endoff; off_t diskeof; size_t bufsize, remain, bufoff, xfersize; uint8_t *tmpbuf; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1<<bshift; int count = *ap->a_count; int async; int i, error; /* * Ignore async for now, the structure of this routine * doesn't exactly allow for it ... */ async = 0; if (ap->a_centeridx != 0) panic("%s: centeridx != not supported", __func__); if (ap->a_access_type & VM_PROT_WRITE) vp->v_iflag |= VI_ONWORKLST; curoff = ap->a_offset & ~PAGE_MASK; for (i = 0; i < count; i++, curoff += PAGE_SIZE) { retrylookup: pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* page is busy? we need to wait until it's released */ if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0); mutex_enter(&uobj->vmobjlock); goto retrylookup; } pg->flags |= PG_BUSY; if (pg->flags & PG_FAKE) break; ap->a_m[i] = pg; } /* got everything? if so, just return */ if (i == count) { mutex_exit(&uobj->vmobjlock); return 0; } /* * didn't? Ok, allocate backing pages. Start from the first * one we missed. */ for (; i < count; i++, curoff += PAGE_SIZE) { retrylookup2: pg = uvm_pagelookup(uobj, curoff); /* found? busy it and be happy */ if (pg) { if (pg->flags & PG_BUSY) { pg->flags = PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg2", 0); mutex_enter(&uobj->vmobjlock); goto retrylookup2; } else { pg->flags |= PG_BUSY; } /* not found? make a new page */ } else { pg = rumpvm_makepage(uobj, curoff); } ap->a_m[i] = pg; } /* * We have done all the clerical work and have all pages busied. * Release the vm object for other consumers. */ mutex_exit(&uobj->vmobjlock); /* * Now, we have all the pages here & busy. Transfer the range * starting from the missing offset and transfer into the * page buffers. */ GOP_SIZE(vp, vp->v_size, &diskeof, 0); /* align to boundaries */ endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT); endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1))); curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1); remain = endoff - curoff; if (diskeof > curoff) remain = MIN(remain, diskeof - curoff); DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n", (unsigned long long)ap->a_offset, (unsigned long long)curoff, (unsigned long long)endoff)); /* read everything into a buffer */ bufsize = round_page(remain); tmpbuf = kmem_zalloc(bufsize, KM_SLEEP); for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t lbn, bn; int run; lbn = (curoff + bufoff) >> bshift; /* XXX: assume eof */ error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP & lazy bum: %d", __func__, error); DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn, (int)(curoff+bufoff), (int)bn, run)); xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain); /* hole? */ if (bn == -1) { memset(tmpbuf + bufoff, 0, xfersize); continue; } bp = getiobuf(vp, true); bp->b_data = tmpbuf + bufoff; bp->b_bcount = xfersize; bp->b_blkno = bn; bp->b_lblkno = 0; bp->b_flags = B_READ; bp->b_cflags = BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY, lazy bum", __func__); if (!async) putiobuf(bp); } /* skip to beginning of pages we're interested in */ bufoff = 0; while (round_page(curoff + bufoff) < trunc_page(ap->a_offset)) bufoff += PAGE_SIZE; DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff))); for (i = 0; i < count; i++, bufoff += PAGE_SIZE) { /* past our prime? */ if (curoff + bufoff >= endoff) break; pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff); KASSERT(pg); DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff))); if (pg->flags & PG_FAKE) { memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE); pg->flags &= ~PG_FAKE; pg->flags |= PG_CLEAN; } ap->a_m[i] = pg; } *ap->a_count = i; kmem_free(tmpbuf, bufsize); return 0; }
/* * This is a slightly strangely structured routine. It always puts * all the pages for a vnode. It starts by releasing pages which * are clean and simultaneously looks up the smallest offset for a * dirty page beloning to the object. If there is no smallest offset, * all pages have been cleaned. Otherwise, it finds a contiguous range * of dirty pages starting from the smallest offset and writes them out. * After this the scan is restarted. */ int genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, struct vm_page **busypg) { char databuf[MAXPHYS]; struct uvm_object *uobj = &vp->v_uobj; struct vm_page *pg, *pg_next; voff_t smallest; voff_t curoff, bufoff; off_t eof; size_t xfersize; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1 << bshift; #if 0 int async = (flags & PGO_SYNCIO) == 0; #else int async = 0; #endif restart: /* check if all pages are clean */ smallest = -1; for (pg = TAILQ_FIRST(&uobj->memq); pg; pg = pg_next) { pg_next = TAILQ_NEXT(pg, listq.queue); /* * XXX: this is not correct at all. But it's based on * assumptions we can make when accessing the pages * only through the file system and not through the * virtual memory subsystem. Well, at least I hope * so ;) */ KASSERT((pg->flags & PG_BUSY) == 0); /* If we can just dump the page, do so */ if (pg->flags & PG_CLEAN || flags & PGO_FREE) { uvm_pagefree(pg); continue; } if (pg->offset < smallest || smallest == -1) smallest = pg->offset; } /* all done? */ if (TAILQ_EMPTY(&uobj->memq)) { vp->v_iflag &= ~VI_ONWORKLST; mutex_exit(&uobj->vmobjlock); return 0; } /* we need to flush */ GOP_SIZE(vp, vp->v_writesize, &eof, 0); for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) { void *curva; if (curoff - smallest >= MAXPHYS) break; pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* XXX: see comment about above KASSERT */ KASSERT((pg->flags & PG_BUSY) == 0); curva = databuf + (curoff-smallest); memcpy(curva, (void *)pg->uanon, PAGE_SIZE); rumpvm_enterva((vaddr_t)curva, pg); pg->flags |= PG_CLEAN; } KASSERT(curoff > smallest); mutex_exit(&uobj->vmobjlock); /* then we write */ for (bufoff = 0; bufoff < MIN(curoff-smallest,eof); bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t bn, lbn; int run, error; lbn = (smallest + bufoff) >> bshift; error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP failed: %d", __func__, error); xfersize = MIN(((lbn+1+run) << bshift) - (smallest+bufoff), curoff - (smallest+bufoff)); /* * We might run across blocks which aren't allocated yet. * A reason might be e.g. the write operation being still * in the kernel page cache while truncate has already * enlarged the file. So just ignore those ranges. */ if (bn == -1) continue; bp = getiobuf(vp, true); /* only write max what we are allowed to write */ bp->b_bcount = xfersize; if (smallest + bufoff + xfersize > eof) bp->b_bcount -= (smallest+bufoff+xfersize) - eof; bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1); KASSERT(bp->b_bcount > 0); KASSERT(smallest >= 0); DPRINTF(("putpages writing from %x to %x (vp size %x)\n", (int)(smallest + bufoff), (int)(smallest + bufoff + bp->b_bcount), (int)eof)); bp->b_bufsize = round_page(bp->b_bcount); bp->b_lblkno = 0; bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT); bp->b_data = databuf + bufoff; bp->b_flags = B_WRITE; bp->b_cflags |= BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } vp->v_numoutput++; VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY lazy bum %d", __func__, bp->b_error); if (!async) putiobuf(bp); } rumpvm_flushva(); mutex_enter(&uobj->vmobjlock); goto restart; }
vaddr_t uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p, size=0x%lx)", map, size,0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0); return(0); } /* * recover object offset from virtual address */ offset = kva - vm_map_min(kernel_map); UVMHIST_LOG(maphist," kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate the memory. we must be careful about released pages. */ loopva = kva; while (size) { simple_lock(&uvm.kernel_object->vmobjlock); pg = uvm_pagelookup(uvm.kernel_object, offset); /* * if we found a page in an unallocated region, it must be * released */ if (pg) { if ((pg->pg_flags & PG_RELEASED) == 0) panic("uvm_km_alloc1: non-released page"); atomic_setbits_int(&pg->pg_flags, PG_WANTED); UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock, FALSE, "km_alloc", 0); continue; /* retry */ } /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } simple_unlock(&uvm.kernel_object->vmobjlock); if (__predict_false(pg == NULL)) { if (curproc == uvm.pagedaemon_proc) { /* * It is unfeasible for the page daemon to * sleep for memory, so free what we have * allocated and fail. */ uvm_unmap(map, kva, loopva - kva); return (NULL); } else { uvm_wait("km_alloc1w"); /* wait for memory */ continue; } } /* * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). */ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); loopva += PAGE_SIZE; offset += PAGE_SIZE; size -= PAGE_SIZE; } pmap_update(map->pmap); /* * zero on request (note that "size" is now zero due to the above loop * so we need to subtract kva from loopva to reconstruct the size). */ if (zeroit) memset((caddr_t)kva, 0, loopva - kva); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
/* ARGSUSED */ int sys_mincore(struct proc *p, void *v, register_t *retval) { struct sys_mincore_args /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ *uap = v; vm_page_t m; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; vm_map_entry_t entry; vaddr_t start, end, lim; vm_map_t map; vsize_t len, npgs; int error = 0; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return (EINVAL); len = round_page(len); end = start + len; if (end <= start) return (EINVAL); npgs = len >> PAGE_SHIFT; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0) return (error); vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (entry->object.uvm_obj->pgops->pgo_releasepg == NULL) { pgi = 1; for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) copyout(&pgi, vec, sizeof(char)); continue; } } amap = entry->aref.ar_amap; /* top layer */ uobj = entry->object.uvm_obj; /* bottom layer */ if (uobj != NULL) simple_lock(&uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the top layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the bottom layer. */ m = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (m != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } copyout(&pgi, vec, sizeof(char)); } if (uobj != NULL) simple_unlock(&uobj->vmobjlock); } out: vm_map_unlock_read(map); uvm_vsunlock(p, SCARG(uap, vec), npgs); return (error); }