static int ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs, int *npages, int centeridx, vm_prot_t access_type, int advice, int flags) { struct vm_page *pg; int i; if (centeridx) panic("%s: centeridx != 0 not supported", __func__); /* loop over pages */ off = trunc_page(off); for (i = 0; i < *npages; i++) { retrylookup: pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT)); if (pg) { if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "aogetpg", 0); goto retrylookup; } pg->flags |= PG_BUSY; pgs[i] = pg; } else { pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT)); pgs[i] = pg; } } mutex_exit(&uobj->vmobjlock); return 0; }
static void thread(void *arg) { mutex_enter(uobj->vmobjlock); threadrun = true; cv_signal(&tcv); testpg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(testpg, uobj->vmobjlock, false, "tw", 0); kthread_exit(0); }
int uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct vm_anon *anon) { boolean_t we_own; /* we own anon's page? */ boolean_t locked; /* did we relock? */ struct vm_page *pg; int result; result = 0; /* XXX shut up gcc */ uvmexp.fltanget++; /* bump rusage counters */ if (anon->an_page) curproc->p_ru.ru_minflt++; else curproc->p_ru.ru_majflt++; /* * loop until we get it, or fail. */ while (1) { we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ pg = anon->an_page; /* * if there is a resident page and it is loaned, then anon * may not own it. call out to uvm_anon_lockpage() to ensure * the real owner of the page has been identified and locked. */ if (pg && pg->loan_count) pg = uvm_anon_lockloanpg(anon); /* * page there? make sure it is not busy/released. */ if (pg) { /* * at this point, if the page has a uobject [meaning * we have it on loan], then that uobject is locked * by us! if the page is busy, we drop all the * locks (including uobject) and try again. */ if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0) { return (VM_PAGER_OK); } atomic_setbits_int(&pg->pg_flags, PG_WANTED); uvmexp.fltpgwait++; /* * the last unlock must be an atomic unlock+wait on * the owner of page */ if (pg->uobject) { /* owner is uobject ? */ uvmfault_unlockall(ufi, amap, NULL, anon); UVM_UNLOCK_AND_WAIT(pg, &pg->uobject->vmobjlock, FALSE, "anonget1",0); } else { /* anon owns page */ uvmfault_unlockall(ufi, amap, NULL, NULL); UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, "anonget2",0); } /* ready to relock and try again */ } else { /* * no page, we must try and bring it in. */ pg = uvm_pagealloc(NULL, 0, anon, 0); if (pg == NULL) { /* out of RAM. */ uvmfault_unlockall(ufi, amap, NULL, anon); uvmexp.fltnoram++; uvm_wait("flt_noram1"); /* ready to relock and try again */ } else { /* we set the PG_BUSY bit */ we_own = TRUE; uvmfault_unlockall(ufi, amap, NULL, anon); /* * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN * page into the uvm_swap_get function with * all data structures unlocked. note that * it is ok to read an_swslot here because * we hold PG_BUSY on the page. */ uvmexp.pageins++; result = uvm_swap_get(pg, anon->an_swslot, PGO_SYNCIO); /* * we clean up after the i/o below in the * "we_own" case */ /* ready to relock and try again */ } } /* * now relock and try again */ locked = uvmfault_relock(ufi); if (locked || we_own) simple_lock(&anon->an_lock); /* * if we own the page (i.e. we set PG_BUSY), then we need * to clean up after the I/O. there are three cases to * consider: * [1] page released during I/O: free anon and ReFault. * [2] I/O not OK. free the page and cause the fault * to fail. * [3] I/O OK! activate the page and sync with the * non-we_own case (i.e. drop anon lock if not locked). */ if (we_own) { if (pg->pg_flags & PG_WANTED) { /* still holding object lock */ wakeup(pg); } /* un-busy! */ atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); /* * if we were RELEASED during I/O, then our anon is * no longer part of an amap. we need to free the * anon and try again. */ if (pg->pg_flags & PG_RELEASED) { pmap_page_protect(pg, VM_PROT_NONE); simple_unlock(&anon->an_lock); uvm_anfree(anon); /* frees page for us */ if (locked) uvmfault_unlockall(ufi, amap, NULL, NULL); uvmexp.fltpgrele++; return (VM_PAGER_REFAULT); /* refault! */ } if (result != VM_PAGER_OK) { KASSERT(result != VM_PAGER_PEND); /* remove page from anon */ anon->an_page = NULL; /* * remove the swap slot from the anon * and mark the anon as having no real slot. * don't free the swap slot, thus preventing * it from being used again. */ uvm_swap_markbad(anon->an_swslot, 1); anon->an_swslot = SWSLOT_BAD; /* * note: page was never !PG_BUSY, so it * can't be mapped and thus no need to * pmap_page_protect it... */ uvm_lock_pageq(); uvm_pagefree(pg); uvm_unlock_pageq(); if (locked) uvmfault_unlockall(ufi, amap, NULL, anon); else simple_unlock(&anon->an_lock); return (VM_PAGER_ERROR); } /* * must be OK, clear modify (already PG_CLEAN) * and activate */ pmap_clear_modify(pg); uvm_lock_pageq(); uvm_pageactivate(pg); uvm_unlock_pageq(); if (!locked) simple_unlock(&anon->an_lock); } /* * we were not able to relock. restart fault. */ if (!locked) return (VM_PAGER_REFAULT); /* * verify no one has touched the amap and moved the anon on us. */ if (ufi != NULL && amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start) != anon) { uvmfault_unlockall(ufi, amap, NULL, anon); return (VM_PAGER_REFAULT); } /* * try it again! */ uvmexp.fltanretry++; continue; } /* while (1) */ /*NOTREACHED*/ }
/* * miscfs/genfs getpages routine. This is a fair bit simpler than the * kernel counterpart since we're not being executed from a fault handler * and generally don't need to care about PGO_LOCKED or other cruft. * We do, however, need to care about page locking and we keep trying until * we get all the pages within the range. The object locking protocol * is the same as for the kernel: enter with the object lock held, * return with it released. */ int genfs_getpages(void *v) { struct vop_getpages_args /* { struct vnode *a_vp; voff_t a_offset; struct vm_page **a_m; int *a_count; int a_centeridx; vm_prot_t a_access_type; int a_advice; int a_flags; } */ *ap = v; struct vnode *vp = ap->a_vp; struct uvm_object *uobj = (struct uvm_object *)vp; struct vm_page *pg; voff_t curoff, endoff; off_t diskeof; size_t bufsize, remain, bufoff, xfersize; uint8_t *tmpbuf; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1<<bshift; int count = *ap->a_count; int async; int i, error; /* * Ignore async for now, the structure of this routine * doesn't exactly allow for it ... */ async = 0; if (ap->a_centeridx != 0) panic("%s: centeridx != not supported", __func__); if (ap->a_access_type & VM_PROT_WRITE) vp->v_iflag |= VI_ONWORKLST; curoff = ap->a_offset & ~PAGE_MASK; for (i = 0; i < count; i++, curoff += PAGE_SIZE) { retrylookup: pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* page is busy? we need to wait until it's released */ if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0); mutex_enter(&uobj->vmobjlock); goto retrylookup; } pg->flags |= PG_BUSY; if (pg->flags & PG_FAKE) break; ap->a_m[i] = pg; } /* got everything? if so, just return */ if (i == count) { mutex_exit(&uobj->vmobjlock); return 0; } /* * didn't? Ok, allocate backing pages. Start from the first * one we missed. */ for (; i < count; i++, curoff += PAGE_SIZE) { retrylookup2: pg = uvm_pagelookup(uobj, curoff); /* found? busy it and be happy */ if (pg) { if (pg->flags & PG_BUSY) { pg->flags = PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg2", 0); mutex_enter(&uobj->vmobjlock); goto retrylookup2; } else { pg->flags |= PG_BUSY; } /* not found? make a new page */ } else { pg = rumpvm_makepage(uobj, curoff); } ap->a_m[i] = pg; } /* * We have done all the clerical work and have all pages busied. * Release the vm object for other consumers. */ mutex_exit(&uobj->vmobjlock); /* * Now, we have all the pages here & busy. Transfer the range * starting from the missing offset and transfer into the * page buffers. */ GOP_SIZE(vp, vp->v_size, &diskeof, 0); /* align to boundaries */ endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT); endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1))); curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1); remain = endoff - curoff; if (diskeof > curoff) remain = MIN(remain, diskeof - curoff); DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n", (unsigned long long)ap->a_offset, (unsigned long long)curoff, (unsigned long long)endoff)); /* read everything into a buffer */ bufsize = round_page(remain); tmpbuf = kmem_zalloc(bufsize, KM_SLEEP); for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t lbn, bn; int run; lbn = (curoff + bufoff) >> bshift; /* XXX: assume eof */ error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP & lazy bum: %d", __func__, error); DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn, (int)(curoff+bufoff), (int)bn, run)); xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain); /* hole? */ if (bn == -1) { memset(tmpbuf + bufoff, 0, xfersize); continue; } bp = getiobuf(vp, true); bp->b_data = tmpbuf + bufoff; bp->b_bcount = xfersize; bp->b_blkno = bn; bp->b_lblkno = 0; bp->b_flags = B_READ; bp->b_cflags = BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY, lazy bum", __func__); if (!async) putiobuf(bp); } /* skip to beginning of pages we're interested in */ bufoff = 0; while (round_page(curoff + bufoff) < trunc_page(ap->a_offset)) bufoff += PAGE_SIZE; DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff))); for (i = 0; i < count; i++, bufoff += PAGE_SIZE) { /* past our prime? */ if (curoff + bufoff >= endoff) break; pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff); KASSERT(pg); DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff))); if (pg->flags & PG_FAKE) { memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE); pg->flags &= ~PG_FAKE; pg->flags |= PG_CLEAN; } ap->a_m[i] = pg; } *ap->a_count = i; kmem_free(tmpbuf, bufsize); return 0; }
vaddr_t uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p, size=0x%lx)", map, size,0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0); return(0); } /* * recover object offset from virtual address */ offset = kva - vm_map_min(kernel_map); UVMHIST_LOG(maphist," kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate the memory. we must be careful about released pages. */ loopva = kva; while (size) { simple_lock(&uvm.kernel_object->vmobjlock); pg = uvm_pagelookup(uvm.kernel_object, offset); /* * if we found a page in an unallocated region, it must be * released */ if (pg) { if ((pg->pg_flags & PG_RELEASED) == 0) panic("uvm_km_alloc1: non-released page"); atomic_setbits_int(&pg->pg_flags, PG_WANTED); UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock, FALSE, "km_alloc", 0); continue; /* retry */ } /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } simple_unlock(&uvm.kernel_object->vmobjlock); if (__predict_false(pg == NULL)) { if (curproc == uvm.pagedaemon_proc) { /* * It is unfeasible for the page daemon to * sleep for memory, so free what we have * allocated and fail. */ uvm_unmap(map, kva, loopva - kva); return (NULL); } else { uvm_wait("km_alloc1w"); /* wait for memory */ continue; } } /* * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). */ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); loopva += PAGE_SIZE; offset += PAGE_SIZE; size -= PAGE_SIZE; } pmap_update(map->pmap); /* * zero on request (note that "size" is now zero due to the above loop * so we need to subtract kva from loopva to reconstruct the size). */ if (zeroit) memset((caddr_t)kva, 0, loopva - kva); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }