vaddr_t uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align) { vaddr_t kva; UVMHIST_FUNC("uvm_km_valloc"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, "(map=%p, size=0x%lx)", map, size, 0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space. will be demand filled by kernel_object. */ if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); return(0); } UVMHIST_LOG(maphist, "<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
void amap_wipeout(struct vm_amap *amap) { int lcv, slot; struct vm_anon *anon; UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0); KASSERT(amap->am_ref == 0); if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { /* * amap_swap_off will call us again. */ amap_unlock(amap); return; } amap_list_remove(amap); amap_unlock(amap); for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { int refs; slot = amap->am_slots[lcv]; anon = amap->am_anon[slot]; if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); mutex_enter(&anon->an_lock); UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, anon->an_ref, 0, 0); refs = --anon->an_ref; mutex_exit(&anon->an_lock); if (refs == 0) { /* * we had the last reference to a vm_anon. free it. */ uvm_anfree(anon); } if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) preempt(); } /* * now we free the map */ amap->am_nused = 0; amap_free(amap); /* will unlock and free amap */ UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); }
int ext2fs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, kauth_cred_t cred) { struct inode *ip = VTOI(vp); struct m_ext2fs *fs = ip->i_e2fs; int error, delta, bshift, bsize; UVMHIST_FUNC("ext2fs_gop_alloc"); UVMHIST_CALLED(ubchist); bshift = fs->e2fs_bshift; bsize = 1 << bshift; delta = off & (bsize - 1); off -= delta; len += delta; while (len > 0) { bsize = min(bsize, len); UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x", off, len, bsize, 0); error = ext2fs_balloc(ip, ext2_lblkno(fs, off), bsize, cred, NULL, flags); if (error) { UVMHIST_LOG(ubchist, "error %d", error, 0,0,0); return error; } /* * increase file size now, ext2fs_balloc() requires that * EOF be up-to-date before each call. */ if (ext2fs_size(ip) < off + bsize) { UVMHIST_LOG(ubchist, "old 0x%lx%8lx new 0x%lx%8lx", /* Note that arguments are always cast to u_long. */ ext2fs_size(ip) >> 32, ext2fs_size(ip) & 0xffffffff, (off + bsize) >> 32, (off + bsize) & 0xffffffff); error = ext2fs_setsize(ip, off + bsize); if (error) { UVMHIST_LOG(ubchist, "error %d", error, 0,0,0); return error; } } off += bsize; len -= bsize; }
/* * uvm_loanpage: loan out pages to kernel (->K) * * => pages should be object-owned and the object should be locked. * => in the case of error, the object might be unlocked and relocked. * => caller should busy the pages beforehand. * => pages will be unbusied. * => fail with EBUSY if meet a wired page. */ static int uvm_loanpage(struct vm_page **pgpp, int npages) { int i; int error = 0; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); for (i = 0; i < npages; i++) { struct vm_page *pg = pgpp[i]; KASSERT(pg->uobject != NULL); KASSERT(pg->uobject == pgpp[0]->uobject); KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT))); KASSERT(mutex_owned(&pg->uobject->vmobjlock)); KASSERT(pg->flags & PG_BUSY); mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); error = EBUSY; break; } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } pg->loan_count++; uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); } uvm_page_unbusy(pgpp, npages); if (error) { /* * backout what we've done */ kmutex_t *slock = &pgpp[0]->uobject->vmobjlock; mutex_exit(slock); uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE); mutex_enter(slock); } UVMHIST_LOG(loanhist, "done %d", error,0,0,0); return error; }
/* * uvm_km_pgremove: remove pages from a kernel uvm_object. * * => when you unmap a part of anonymous kernel memory you want to toss * the pages right away. (this gets called from uvm_unmap_...). */ void uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) { struct vm_page *pp; voff_t curoff; UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist); KASSERT(uobj->pgops == &aobj_pager); for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { pp = uvm_pagelookup(uobj, curoff); if (pp == NULL) continue; UVMHIST_LOG(maphist," page %p, busy=%ld", pp, pp->pg_flags & PG_BUSY, 0, 0); if (pp->pg_flags & PG_BUSY) { /* owner must check for this when done */ atomic_setbits_int(&pp->pg_flags, PG_RELEASED); } else { /* free the swap slot... */ uao_dropswap(uobj, curoff >> PAGE_SHIFT); /* * ...and free the page; note it may be on the * active or inactive queues. */ uvm_lock_pageq(); uvm_pagefree(pp); uvm_unlock_pageq(); } } }
vaddr_t uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer) { vaddr_t kva; UVMHIST_FUNC("uvm_km_valloc_prefer_wait"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, "(map=%p, size=0x%lx)", map, size, 0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); if (size > vm_map_max(map) - vm_map_min(map)) return(0); while (1) { kva = vm_map_min(map); /* hint */ /* * allocate some virtual space. will be demand filled * by kernel_object. */ if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) == 0)) { UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); } /* * failed. sleep for a while (on map) */ UVMHIST_LOG(maphist,"<<<sleeping>>>",0,0,0,0); tsleep((caddr_t)map, PVM, "vallocwait", 0); } /*NOTREACHED*/ }
/* * Allocate len bytes at offset off. */ int ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, kauth_cred_t cred) { struct inode *ip = VTOI(vp); int error, delta, bshift, bsize; UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist); error = 0; bshift = vp->v_mount->mnt_fs_bshift; bsize = 1 << bshift; delta = off & (bsize - 1); off -= delta; len += delta; while (len > 0) { bsize = MIN(bsize, len); error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL); if (error) { goto out; } /* * increase file size now, UFS_BALLOC() requires that * EOF be up-to-date before each call. */ if (ip->i_size < off + bsize) { UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", vp, ip->i_size, off + bsize, 0); ip->i_size = off + bsize; DIP_ASSIGN(ip, size, ip->i_size); } off += bsize; len -= bsize; } out: UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); return error; }
struct vm_amap * amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) { struct vm_amap *amap; int slots, padslots; UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist); AMAP_B2SLOT(slots, sz); AMAP_B2SLOT(padslots, padsz); amap = amap_alloc1(slots, padslots, waitf); if (amap) { memset(amap->am_anon, 0, amap->am_maxslot * sizeof(struct vm_anon *)); amap_list_insert(amap); } UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0); return(amap); }
/* * amap_free: free an amap * * => the amap must be unlocked * => the amap should have a zero reference count and be empty */ void amap_free(struct vm_amap *amap) { int slots; UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist); KASSERT(amap->am_ref == 0 && amap->am_nused == 0); KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); KASSERT(!mutex_owned(&amap->am_l)); slots = amap->am_maxslot; kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots)); kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr)); kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon)); #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref)); #endif mutex_destroy(&amap->am_l); pool_cache_put(&uvm_amap_cache, amap); UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0); }
/* * amap_extend: extend the size of an amap (if needed) * * => called from uvm_map when we want to extend an amap to cover * a new mapping (rather than allocate a new one) * => amap should be unlocked (we will lock it) * => to safely extend an amap it should have a reference count of * one (thus it can't be shared) */ int amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags) { struct vm_amap *amap = entry->aref.ar_amap; int slotoff = entry->aref.ar_pageoff; int slotmapped, slotadd, slotneed, slotadded, slotalloc; int slotadj, slotspace; int oldnslots; #ifdef UVM_AMAP_PPREF int *newppref, *oldppref; #endif int i, *newsl, *newbck, *oldsl, *oldbck; struct vm_anon **newover, **oldover; const km_flag_t kmflags = (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, " (entry=0x%x, addsize=0x%x, flags=0x%x)", entry, addsize, flags, 0); /* * first, determine how many slots we need in the amap. don't * forget that ar_pageoff could be non-zero: this means that * there are some unused slots before us in the amap. */ amap_lock(amap); KASSERT(amap_refs(amap) == 1); /* amap can't be shared */ AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ AMAP_B2SLOT(slotadd, addsize); /* slots to add */ if (flags & AMAP_EXTEND_FORWARDS) { slotneed = slotoff + slotmapped + slotadd; slotadj = 0; slotspace = 0; } else { slotneed = slotadd + slotmapped; slotadj = slotadd - slotoff; slotspace = amap->am_maxslot - slotmapped; } /* * case 1: we already have enough slots in the map and thus * only need to bump the reference counts on the slots we are * adding. */ if (flags & AMAP_EXTEND_FORWARDS) { if (amap->am_nslot >= slotneed) { #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); } #endif amap_unlock(amap); UVMHIST_LOG(maphist, "<- done (case 1f), amap = 0x%x, sltneed=%d", amap, slotneed, 0, 0); return 0; } } else { if (slotadj <= 0) { slotoff -= slotadd; entry->aref.ar_pageoff = slotoff; #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { amap_pp_adjref(amap, slotoff, slotadd, 1); } #endif amap_unlock(amap); UVMHIST_LOG(maphist, "<- done (case 1b), amap = 0x%x, sltneed=%d", amap, slotneed, 0, 0); return 0; } } /* * case 2: we pre-allocated slots for use and we just need to * bump nslot up to take account for these slots. */ if (amap->am_maxslot >= slotneed) { if (flags & AMAP_EXTEND_FORWARDS) { #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { if ((slotoff + slotmapped) < amap->am_nslot) amap_pp_adjref(amap, slotoff + slotmapped, (amap->am_nslot - (slotoff + slotmapped)), 1); pp_setreflen(amap->am_ppref, amap->am_nslot, 1, slotneed - amap->am_nslot); } #endif amap->am_nslot = slotneed; amap_unlock(amap); /* * no need to zero am_anon since that was done at * alloc time and we never shrink an allocation. */ UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, " "slotneed=%d", amap, slotneed, 0, 0); return 0; } else { #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { /* * Slide up the ref counts on the pages that * are actually in use. */ memmove(amap->am_ppref + slotspace, amap->am_ppref + slotoff, slotmapped * sizeof(int)); /* * Mark the (adjusted) gap at the front as * referenced/not referenced. */ pp_setreflen(amap->am_ppref, 0, 0, slotspace - slotadd); pp_setreflen(amap->am_ppref, slotspace - slotadd, 1, slotadd); } #endif /* * Slide the anon pointers up and clear out * the space we just made. */ memmove(amap->am_anon + slotspace, amap->am_anon + slotoff, slotmapped * sizeof(struct vm_anon*)); memset(amap->am_anon + slotoff, 0, (slotspace - slotoff) * sizeof(struct vm_anon *)); /* * Slide the backpointers up, but don't bother * wiping out the old slots. */ memmove(amap->am_bckptr + slotspace, amap->am_bckptr + slotoff, slotmapped * sizeof(int)); /* * Adjust all the useful active slot numbers. */ for (i = 0; i < amap->am_nused; i++) amap->am_slots[i] += (slotspace - slotoff); /* * We just filled all the empty space in the * front of the amap by activating a few new * slots. */ amap->am_nslot = amap->am_maxslot; entry->aref.ar_pageoff = slotspace - slotadd; amap_unlock(amap); UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, " "slotneed=%d", amap, slotneed, 0, 0); return 0; } } /* * case 3: we need to malloc a new amap and copy all the amap * data over from old amap to the new one. * * note that the use of a kernel realloc() probably would not * help here, since we wish to abort cleanly if one of the * three (or four) mallocs fails. */ amap_unlock(amap); /* unlock in case we sleep in malloc */ if (slotneed >= UVM_AMAP_LARGE) { return E2BIG; } slotalloc = amap_roundup_slots(slotneed); #ifdef UVM_AMAP_PPREF newppref = NULL; if (amap->am_ppref && amap->am_ppref != PPREF_NONE) newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags); #endif newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags); newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags); newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags); if (newsl == NULL || newbck == NULL || newover == NULL) { #ifdef UVM_AMAP_PPREF if (newppref != NULL) { kmem_free(newppref, slotalloc * sizeof(*newppref)); } #endif if (newsl != NULL) { kmem_free(newsl, slotalloc * sizeof(*newsl)); } if (newbck != NULL) { kmem_free(newbck, slotalloc * sizeof(*newbck)); } if (newover != NULL) { kmem_free(newover, slotalloc * sizeof(*newover)); } return ENOMEM; } amap_lock(amap); KASSERT(amap->am_maxslot < slotneed); /* * now copy everything over to new malloc'd areas... */ slotadded = slotalloc - amap->am_nslot; if (!(flags & AMAP_EXTEND_FORWARDS)) slotspace = slotalloc - slotmapped; /* do am_slots */ oldsl = amap->am_slots; if (flags & AMAP_EXTEND_FORWARDS) memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); else for (i = 0; i < amap->am_nused; i++) newsl[i] = oldsl[i] + slotspace - slotoff; amap->am_slots = newsl; /* do am_anon */ oldover = amap->am_anon; if (flags & AMAP_EXTEND_FORWARDS) { memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * slotadded); } else { memcpy(newover + slotspace, oldover + slotoff, sizeof(struct vm_anon *) * slotmapped); memset(newover, 0, sizeof(struct vm_anon *) * slotspace); } amap->am_anon = newover; /* do am_bckptr */ oldbck = amap->am_bckptr; if (flags & AMAP_EXTEND_FORWARDS) memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); else memcpy(newbck + slotspace, oldbck + slotoff, sizeof(int) * slotmapped); amap->am_bckptr = newbck; #ifdef UVM_AMAP_PPREF /* do ppref */ oldppref = amap->am_ppref; if (newppref) { if (flags & AMAP_EXTEND_FORWARDS) { memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); } else { memcpy(newppref + slotspace, oldppref + slotoff, sizeof(int) * slotmapped); } amap->am_ppref = newppref; if ((flags & AMAP_EXTEND_FORWARDS) && (slotoff + slotmapped) < amap->am_nslot) amap_pp_adjref(amap, slotoff + slotmapped, (amap->am_nslot - (slotoff + slotmapped)), 1); if (flags & AMAP_EXTEND_FORWARDS) pp_setreflen(newppref, amap->am_nslot, 1, slotneed - amap->am_nslot); else { pp_setreflen(newppref, 0, 0, slotalloc - slotneed); pp_setreflen(newppref, slotalloc - slotneed, 1, slotneed - slotmapped); } } else { if (amap->am_ppref) amap->am_ppref = PPREF_NONE; } #endif /* update master values */ if (flags & AMAP_EXTEND_FORWARDS) amap->am_nslot = slotneed; else { entry->aref.ar_pageoff = slotspace - slotadd; amap->am_nslot = slotalloc; } oldnslots = amap->am_maxslot; amap->am_maxslot = slotalloc; amap_unlock(amap); kmem_free(oldsl, oldnslots * sizeof(*oldsl)); kmem_free(oldbck, oldnslots * sizeof(*oldbck)); kmem_free(oldover, oldnslots * sizeof(*oldover)); #ifdef UVM_AMAP_PPREF if (oldppref && oldppref != PPREF_NONE) kmem_free(oldppref, oldnslots * sizeof(*oldppref)); #endif UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", amap, slotneed, 0, 0); return 0; }
int uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) { struct uvm_faultinfo ufi; void **result, **output; int rv, error; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * ensure that one and only one of the flags is set */ KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ ((flags & UVM_LOAN_TOPAGE) == 0)); KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); /* * "output" is a pointer to the current place to put the loaned page. */ result = v; output = &result[0]; /* start at the beginning ... */ /* * while we've got pages to do */ while (len > 0) { /* * fill in params for a call to uvmfault_lookup */ ufi.orig_map = map; ufi.orig_rvaddr = start; ufi.orig_size = len; /* * do the lookup, the only time this will fail is if we hit on * an unmapped region (an error) */ if (!uvmfault_lookup(&ufi, false)) { error = ENOENT; goto fail; } /* * map now locked. now do the loanout... */ rv = uvm_loanentry(&ufi, &output, flags); if (rv < 0) { /* all unlocked due to error */ error = EINVAL; goto fail; } /* * done! the map is unlocked. advance, if possible. * * XXXCDC: could be recoded to hold the map lock with * smarter code (but it only happens on map entry * boundaries, so it isn't that bad). */ if (rv) { rv <<= PAGE_SHIFT; len -= rv; start += rv; } } UVMHIST_LOG(loanhist, "success", 0,0,0,0); return 0; fail: /* * failed to complete loans. drop any loans and return failure code. * map is already unlocked. */ if (output - result) { if (flags & UVM_LOAN_TOANON) { uvm_unloananon((struct vm_anon **)result, output - result); } else { uvm_unloanpage((struct vm_page **)result, output - result); } } UVMHIST_LOG(loanhist, "error %d", error,0,0,0); return (error); }
/* * void prefetch_abort_handler(trapframe_t *tf) * * Abort handler called when instruction execution occurs at * a non existent or restricted (access permissions) memory page. * If the address is invalid and we were in SVC mode then panic as * the kernel should never prefetch abort. * If the address is invalid and the page is mapped then the user process * does no have read permission so send it a signal. * Otherwise fault the page in and try again. */ void prefetch_abort_handler(trapframe_t *tf) { struct lwp *l; struct pcb *pcb __diagused; struct vm_map *map; vaddr_t fault_pc, va; ksiginfo_t ksi; int error, user; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* Update vmmeter statistics */ curcpu()->ci_data.cpu_ntrap++; l = curlwp; pcb = lwp_getpcb(l); if ((user = TRAP_USERMODE(tf)) != 0) LWP_CACHE_CREDS(l, l->l_proc); /* * Enable IRQ's (disabled by the abort) This always comes * from user mode so we know interrupts were not disabled. * But we check anyway. */ KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); if (__predict_true((tf->tf_spsr & I32_bit) != IF32_bits)) restore_interrupts(tf->tf_spsr & IF32_bits); /* See if the CPU state needs to be fixed up */ switch (prefetch_abort_fixup(tf)) { case ABORT_FIXUP_RETURN: KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); return; case ABORT_FIXUP_FAILED: /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) tf->tf_pc; lwp_settrapframe(l, tf); goto do_trapsignal; default: break; } /* Prefetch aborts cannot happen in kernel mode */ if (__predict_false(!user)) dab_fatal(tf, 0, tf->tf_pc, NULL, NULL); /* Get fault address */ fault_pc = tf->tf_pc; lwp_settrapframe(l, tf); UVMHIST_LOG(maphist, " (pc=0x%x, l=0x%x, tf=0x%x)", fault_pc, l, tf, 0); /* Ok validate the address, can only execute in USER space */ if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS || (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_addr = (uint32_t *)(intptr_t) fault_pc; ksi.ksi_trap = fault_pc; goto do_trapsignal; } map = &l->l_proc->p_vmspace->vm_map; va = trunc_page(fault_pc); /* * See if the pmap can handle this fault on its own... */ #ifdef DEBUG last_fault_code = -1; #endif if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ|VM_PROT_EXECUTE, 1)) { UVMHIST_LOG (maphist, " <- emulated", 0, 0, 0, 0); goto out; } #ifdef DIAGNOSTIC if (__predict_false(curcpu()->ci_intr_depth > 0)) { printf("\nNon-emulated prefetch abort with intr_depth > 0\n"); dab_fatal(tf, 0, tf->tf_pc, NULL, NULL); } #endif KASSERT(pcb->pcb_onfault == NULL); error = uvm_fault(map, va, VM_PROT_READ|VM_PROT_EXECUTE); if (__predict_true(error == 0)) { UVMHIST_LOG (maphist, " <- uvm", 0, 0, 0, 0); goto out; } KSI_INIT_TRAP(&ksi); UVMHIST_LOG (maphist, " <- fatal (%d)", error, 0, 0, 0); if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", l->l_proc->p_pid, l->l_proc->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; ksi.ksi_addr = (uint32_t *)(intptr_t) fault_pc; ksi.ksi_trap = fault_pc; do_trapsignal: call_trapsignal(l, tf, &ksi); out: KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); userret(l); }
void data_abort_handler(trapframe_t *tf) { struct vm_map *map; struct lwp * const l = curlwp; struct cpu_info * const ci = curcpu(); u_int far, fsr; vm_prot_t ftype; void *onfault; vaddr_t va; int error; ksiginfo_t ksi; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* Grab FAR/FSR before enabling interrupts */ far = cpu_faultaddress(); fsr = cpu_faultstatus(); /* Update vmmeter statistics */ ci->ci_data.cpu_ntrap++; /* Re-enable interrupts if they were enabled previously */ KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); if (__predict_true((tf->tf_spsr & IF32_bits) != IF32_bits)) restore_interrupts(tf->tf_spsr & IF32_bits); /* Get the current lwp structure */ UVMHIST_LOG(maphist, " (l=%#x, far=%#x, fsr=%#x", l, far, fsr, 0); UVMHIST_LOG(maphist, " tf=%#x, pc=%#x)", tf, tf->tf_pc, 0, 0); /* Data abort came from user mode? */ bool user = (TRAP_USERMODE(tf) != 0); if (user) LWP_CACHE_CREDS(l, l->l_proc); /* Grab the current pcb */ struct pcb * const pcb = lwp_getpcb(l); curcpu()->ci_abt_evs[fsr & FAULT_TYPE_MASK].ev_count++; /* Invoke the appropriate handler, if necessary */ if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) { #ifdef DIAGNOSTIC printf("%s: data_aborts fsr=0x%x far=0x%x\n", __func__, fsr, far); #endif if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far, l, &ksi)) goto do_trapsignal; goto out; } /* * At this point, we're dealing with one of the following data aborts: * * FAULT_TRANS_S - Translation -- Section * FAULT_TRANS_P - Translation -- Page * FAULT_DOMAIN_S - Domain -- Section * FAULT_DOMAIN_P - Domain -- Page * FAULT_PERM_S - Permission -- Section * FAULT_PERM_P - Permission -- Page * * These are the main virtual memory-related faults signalled by * the MMU. */ /* fusubailout is used by [fs]uswintr to avoid page faulting */ if (__predict_false(pcb->pcb_onfault == fusubailout)) { tf->tf_r0 = EFAULT; tf->tf_pc = (intptr_t) pcb->pcb_onfault; return; } if (user) { lwp_settrapframe(l, tf); } /* * Make sure the Program Counter is sane. We could fall foul of * someone executing Thumb code, in which case the PC might not * be word-aligned. This would cause a kernel alignment fault * further down if we have to decode the current instruction. */ #ifdef THUMB_CODE /* * XXX: It would be nice to be able to support Thumb in the kernel * at some point. */ if (__predict_false(!user && (tf->tf_pc & 3) != 0)) { printf("\n%s: Misaligned Kernel-mode Program Counter\n", __func__); dab_fatal(tf, fsr, far, l, NULL); } #else if (__predict_false((tf->tf_pc & 3) != 0)) { if (user) { /* * Give the user an illegal instruction signal. */ /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; goto do_trapsignal; } /* * The kernel never executes Thumb code. */ printf("\n%s: Misaligned Kernel-mode Program Counter\n", __func__); dab_fatal(tf, fsr, far, l, NULL); } #endif /* See if the CPU state needs to be fixed up */ switch (data_abort_fixup(tf, fsr, far, l)) { case ABORT_FIXUP_RETURN: return; case ABORT_FIXUP_FAILED: /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; goto do_trapsignal; default: break; } va = trunc_page((vaddr_t)far); /* * It is only a kernel address space fault iff: * 1. user == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set and not LDRT/LDRBT/STRT/STRBT instruction. */ if (!user && (va >= VM_MIN_KERNEL_ADDRESS || (va < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW)) && __predict_true((pcb->pcb_onfault == NULL || (read_insn(tf->tf_pc, false) & 0x05200000) != 0x04200000))) { map = kernel_map; /* Was the fault due to the FPE/IPKDB ? */ if (__predict_false((tf->tf_spsr & PSR_MODE)==PSR_UND32_MODE)) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; /* * Force exit via userret() * This is necessary as the FPE is an extension to * userland that actually runs in a priveledged mode * but uses USR mode permissions for its accesses. */ user = true; goto do_trapsignal; } } else { map = &l->l_proc->p_vmspace->vm_map; } /* * We need to know whether the page should be mapped as R or R/W. * Before ARMv6, the MMU did not give us the info as to whether the * fault was caused by a read or a write. * * However, we know that a permission fault can only be the result of * a write to a read-only location, so we can deal with those quickly. * * Otherwise we need to disassemble the instruction responsible to * determine if it was a write. */ if (CPU_IS_ARMV6_P() || CPU_IS_ARMV7_P()) { ftype = (fsr & FAULT_WRITE) ? VM_PROT_WRITE : VM_PROT_READ; } else if (IS_PERMISSION_FAULT(fsr)) { ftype = VM_PROT_WRITE; } else { #ifdef THUMB_CODE /* Fast track the ARM case. */ if (__predict_false(tf->tf_spsr & PSR_T_bit)) { u_int insn = read_thumb_insn(tf->tf_pc, user); u_int insn_f8 = insn & 0xf800; u_int insn_fe = insn & 0xfe00; if (insn_f8 == 0x6000 || /* STR(1) */ insn_f8 == 0x7000 || /* STRB(1) */ insn_f8 == 0x8000 || /* STRH(1) */ insn_f8 == 0x9000 || /* STR(3) */ insn_f8 == 0xc000 || /* STM */ insn_fe == 0x5000 || /* STR(2) */ insn_fe == 0x5200 || /* STRH(2) */ insn_fe == 0x5400) /* STRB(2) */ ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } else #endif { u_int insn = read_insn(tf->tf_pc, user); if (((insn & 0x0c100000) == 0x04000000) || /* STR[B] */ ((insn & 0x0e1000b0) == 0x000000b0) || /* STR[HD]*/ ((insn & 0x0a100000) == 0x08000000) || /* STM/CDT*/ ((insn & 0x0f9000f0) == 0x01800090)) /* STREX[BDH] */ ftype = VM_PROT_WRITE; else if ((insn & 0x0fb00ff0) == 0x01000090)/* SWP */ ftype = VM_PROT_READ | VM_PROT_WRITE; else ftype = VM_PROT_READ; } } /* * See if the fault is as a result of ref/mod emulation, * or domain mismatch. */ #ifdef DEBUG last_fault_code = fsr; #endif if (pmap_fault_fixup(map->pmap, va, ftype, user)) { UVMHIST_LOG(maphist, " <- ref/mod emul", 0, 0, 0, 0); goto out; } if (__predict_false(curcpu()->ci_intr_depth > 0)) { if (pcb->pcb_onfault) { tf->tf_r0 = EINVAL; tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault; return; } printf("\nNon-emulated page fault with intr_depth > 0\n"); dab_fatal(tf, fsr, far, l, NULL); } onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; error = uvm_fault(map, va, ftype); pcb->pcb_onfault = onfault; if (__predict_true(error == 0)) { if (user) uvm_grow(l->l_proc, va); /* Record any stack growth */ else ucas_ras_check(tf); UVMHIST_LOG(maphist, " <- uvm", 0, 0, 0, 0); goto out; } if (user == 0) { if (pcb->pcb_onfault) { tf->tf_r0 = error; tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault; return; } printf("\nuvm_fault(%p, %lx, %x) -> %x\n", map, va, ftype, error); dab_fatal(tf, fsr, far, l, NULL); } KSI_INIT_TRAP(&ksi); if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", l->l_proc->p_pid, l->l_proc->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else ksi.ksi_signo = SIGSEGV; ksi.ksi_code = (error == EACCES) ? SEGV_ACCERR : SEGV_MAPERR; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; UVMHIST_LOG(maphist, " <- error (%d)", error, 0, 0, 0); do_trapsignal: call_trapsignal(l, tf, &ksi); out: /* If returning to user mode, make sure to invoke userret() */ if (user) userret(l); }
vaddr_t uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p, size=0x%lx)", map, size,0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0); return(0); } /* * recover object offset from virtual address */ offset = kva - vm_map_min(kernel_map); UVMHIST_LOG(maphist," kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate the memory. we must be careful about released pages. */ loopva = kva; while (size) { simple_lock(&uvm.kernel_object->vmobjlock); pg = uvm_pagelookup(uvm.kernel_object, offset); /* * if we found a page in an unallocated region, it must be * released */ if (pg) { if ((pg->pg_flags & PG_RELEASED) == 0) panic("uvm_km_alloc1: non-released page"); atomic_setbits_int(&pg->pg_flags, PG_WANTED); UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock, FALSE, "km_alloc", 0); continue; /* retry */ } /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } simple_unlock(&uvm.kernel_object->vmobjlock); if (__predict_false(pg == NULL)) { if (curproc == uvm.pagedaemon_proc) { /* * It is unfeasible for the page daemon to * sleep for memory, so free what we have * allocated and fail. */ uvm_unmap(map, kva, loopva - kva); return (NULL); } else { uvm_wait("km_alloc1w"); /* wait for memory */ continue; } } /* * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). */ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); loopva += PAGE_SIZE; offset += PAGE_SIZE; size -= PAGE_SIZE; } pmap_update(map->pmap); /* * zero on request (note that "size" is now zero due to the above loop * so we need to subtract kva from loopva to reconstruct the size). */ if (zeroit) memset((caddr_t)kva, 0, loopva - kva); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
vaddr_t uvm_km_kmemalloc(struct vm_map *map, struct uvm_object *obj, vsize_t size, int flags) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; int mapflags; UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist," (map=%p, obj=%p, size=0x%lx, flags=%d)", map, obj, size, flags); KASSERT(vm_map_pmap(map) == pmap_kernel()); /* * we cannot yet make pmap_enter() not sleep * and thus demand that we are called with NOWAIT in that case */ KASSERT(!((flags & UVM_KMF_NOWAIT) && obj)); /* * setup for call */ mapflags = flags & UVM_KMF_NOWAIT? UVM_FLAG_NOWAIT : 0; mapflags |= flags & UVM_KMF_TRYLOCK; size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_RANDOM, mapflags)) != 0)) { UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0); return(0); } /* * if all we wanted was VA, return now */ if (flags & UVM_KMF_VALLOC) { UVMHIST_LOG(maphist,"<- done valloc (kva=0x%lx)", kva,0,0,0); return(kva); } /* * recover object offset from virtual address */ if (obj != NULL) offset = kva - vm_map_min(kernel_map); else offset = 0; UVMHIST_LOG(maphist, " kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate and map in the memory... note that we are the only ones * whom should ever get a handle on this area of VM. */ loopva = kva; while (loopva != kva + size) { pg = uvm_pagealloc(obj, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } if (__predict_false(pg == NULL)) { if ((flags & UVM_KMF_NOWAIT) || ((flags & UVM_KMF_CANFAIL) && uvmexp.swpgonly == uvmexp.swpages)) { /* free everything! */ uvm_unmap(map, kva, kva + size); return (0); } else { uvm_wait("km_getwait2"); /* sleep here */ continue; } } /* * map it in: note that we call pmap_enter with the map and * object unlocked in case we are kmem_map. * * pager mappings that must not sleep here will incidently * be installed using pmap_kenter_pa() and thus not sleep! */ if (obj == NULL) { pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW); } else { pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); } loopva += PAGE_SIZE; offset += PAGE_SIZE; } pmap_update(pmap_kernel()); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
void amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags, vaddr_t startva, vaddr_t endva) { struct vm_amap *amap, *srcamap; int slots, lcv; vaddr_t chunksize; const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0; const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0; UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, " (map=%p, entry=%p, flags=%d)", map, entry, flags, 0); KASSERT(map != kernel_map); /* we use nointr pool */ /* * is there a map to copy? if not, create one from scratch. */ if (entry->aref.ar_amap == NULL) { /* * check to see if we have a large amap that we can * chunk. we align startva/endva to chunk-sized * boundaries and then clip to them. */ if (canchunk && atop(entry->end - entry->start) >= UVM_AMAP_LARGE) { /* convert slots to bytes */ chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; startva = (startva / chunksize) * chunksize; endva = roundup(endva, chunksize); UVMHIST_LOG(maphist, " chunk amap ==> clip 0x%x->0x%x" "to 0x%x->0x%x", entry->start, entry->end, startva, endva); UVM_MAP_CLIP_START(map, entry, startva, NULL); /* watch out for endva wrap-around! */ if (endva >= startva) UVM_MAP_CLIP_END(map, entry, endva, NULL); } if ((flags & AMAP_COPY_NOMERGE) == 0 && uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) { return; } UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", entry->start, entry->end, 0, 0); entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, waitf); if (entry->aref.ar_amap != NULL) entry->etype &= ~UVM_ET_NEEDSCOPY; return; } /* * first check and see if we are the only map entry * referencing the amap we currently have. if so, then we can * just take it over rather than copying it. note that we are * reading am_ref with the amap unlocked... the value can only * be one if we have the only reference to the amap (via our * locked map). if we are greater than one we fall through to * the next case (where we double check the value). */ if (entry->aref.ar_amap->am_ref == 1) { entry->etype &= ~UVM_ET_NEEDSCOPY; UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 0, 0, 0, 0); return; } /* * looks like we need to copy the map. */ UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); AMAP_B2SLOT(slots, entry->end - entry->start); amap = amap_alloc1(slots, 0, waitf); if (amap == NULL) { UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); return; } srcamap = entry->aref.ar_amap; amap_lock(srcamap); /* * need to double check reference count now that we've got the * src amap locked down. the reference count could have * changed while we were in malloc. if the reference count * dropped down to one we take over the old map rather than * copying the amap. */ if (srcamap->am_ref == 1) { /* take it over? */ entry->etype &= ~UVM_ET_NEEDSCOPY; amap->am_ref--; /* drop final reference to map */ amap_free(amap); /* dispose of new (unused) amap */ amap_unlock(srcamap); return; } /* * we must copy it now. */ UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); for (lcv = 0 ; lcv < slots; lcv++) { amap->am_anon[lcv] = srcamap->am_anon[entry->aref.ar_pageoff + lcv]; if (amap->am_anon[lcv] == NULL) continue; mutex_enter(&amap->am_anon[lcv]->an_lock); amap->am_anon[lcv]->an_ref++; mutex_exit(&amap->am_anon[lcv]->an_lock); amap->am_bckptr[lcv] = amap->am_nused; amap->am_slots[amap->am_nused] = lcv; amap->am_nused++; } memset(&amap->am_anon[lcv], 0, (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); /* * drop our reference to the old amap (srcamap) and unlock. * we know that the reference count on srcamap is greater than * one (we checked above), so there is no way we could drop * the count to zero. [and no need to worry about freeing it] */ srcamap->am_ref--; if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ #ifdef UVM_AMAP_PPREF if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { amap_pp_adjref(srcamap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, -1); }
static inline int uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) { vaddr_t curaddr = ufi->orig_rvaddr; vsize_t togo = ufi->size; struct vm_aref *aref = &ufi->entry->aref; struct uvm_object *uobj = ufi->entry->object.uvm_obj; struct vm_anon *anon; int rv, result = 0; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * lock us the rest of the way down (we unlock before return) */ if (aref->ar_amap) amap_lock(aref->ar_amap); /* * loop until done */ while (togo) { /* * find the page we want. check the anon layer first. */ if (aref->ar_amap) { anon = amap_lookup(aref, curaddr - ufi->entry->start); } else { anon = NULL; } /* locked: map, amap, uobj */ if (anon) { rv = uvm_loananon(ufi, output, flags, anon); } else if (uobj) { rv = uvm_loanuobj(ufi, output, flags, curaddr); } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { rv = uvm_loanzero(ufi, output, flags); } else { uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL); rv = -1; } /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ KASSERT(rv > 0 || aref->ar_amap == NULL || !mutex_owned(&aref->ar_amap->am_l)); KASSERT(rv > 0 || uobj == NULL || !mutex_owned(&uobj->vmobjlock)); /* total failure */ if (rv < 0) { UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0); return (-1); } /* relock failed, need to do another lookup */ if (rv == 0) { UVMHIST_LOG(loanhist, "relock failure %d", result ,0,0,0); return (result); } /* * got it... advance to next page */ result++; togo -= PAGE_SIZE; curaddr += PAGE_SIZE; } /* * unlock what we locked, unlock the maps and return */ if (aref->ar_amap) amap_unlock(aref->ar_amap); uvmfault_unlockmaps(ufi, false); UVMHIST_LOG(loanhist, "done %d", result, 0,0,0); return (result); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2fs_balloc(struct inode *ip, daddr_t bn, int size, kauth_cred_t cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2FS_NIADDR + 2]; daddr_t newb, lbn, pref; int32_t *bap; /* XXX ondisk32 */ int num, i, error; u_int deallocated; daddr_t *blkp, *allocblk, allociblk[EXT2FS_NIADDR + 1]; int32_t *allocib; /* XXX ondisk32 */ int unwindidx = -1; UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); if (bpp != NULL) { *bpp = NULL; } if (bn < 0) return (EFBIG); fs = ip->i_e2fs; lbn = bn; /* * The first EXT2FS_NDADDR blocks are direct blocks */ if (bn < EXT2FS_NDADDR) { /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { /* * the block is already allocated, just read it. */ if (bpp != NULL) { error = bread(vp, bn, fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (error) { return (error); } *bpp = bp; } return (0); } /* * allocate a new direct block. */ error = ext2fs_alloc(ip, bn, ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ ip->i_e2fs_blocks[bn] = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]); allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) return (error); nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]; /* XXX ondisk32 */ *allocib = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { goto fail; } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ nb = fs2h32(bap[indirs[i].in_off]); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; /* XXX ondisk32 */ bap[indirs[i - 1].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ bap[indirs[num].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } if (bpp != NULL) { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &nbp); if (error) { goto fail; } } else { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ext2fs_blkfree(ip, *blkp); deallocated += fs->e2fs_bsize; } if (unwindidx >= 0) { if (unwindidx == 0) { *allocib = 0; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, 0, 0); brelse(bp, BC_INVAL); } } if (deallocated) { ext2fs_setnblock(ip, ext2fs_nblock(ip) - btodb(deallocated)); ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE; } return error; }
int uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, struct vm_anon *anon) { struct vm_page *pg; int error; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * if we are loaning to "another" anon then it is easy, we just * bump the reference count on the current anon and return a * pointer to it (it becomes copy-on-write shared). */ if (flags & UVM_LOAN_TOANON) { mutex_enter(&anon->an_lock); pg = anon->an_page; if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { if (pg->wire_count > 0) { UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, ufi->entry->object.uvm_obj, anon); return (-1); } pmap_page_protect(pg, VM_PROT_READ); } anon->an_ref++; **output = anon; (*output)++; mutex_exit(&anon->an_lock); UVMHIST_LOG(loanhist, "->A done", 0,0,0,0); return (1); } /* * we are loaning to a kernel-page. we need to get the page * resident so we can wire it. uvmfault_anonget will handle * this for us. */ mutex_enter(&anon->an_lock); error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); /* * if we were unable to get the anon, then uvmfault_anonget has * unlocked everything and returned an error code. */ if (error) { UVMHIST_LOG(loanhist, "error %d", error,0,0,0); /* need to refault (i.e. refresh our lookup) ? */ if (error == ERESTART) { return (0); } /* "try again"? sleep a bit and retry ... */ if (error == EAGAIN) { tsleep(&lbolt, PVM, "loanagain", 0); return (0); } /* otherwise flag it as an error */ return (-1); } /* * we have the page and its owner locked: do the loan now. */ pg = anon->an_page; mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0); KASSERT(pg->uobject == NULL); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, anon); return (-1); } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } pg->loan_count++; uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); **output = pg; (*output)++; /* unlock anon and return success */ if (pg->uobject) mutex_exit(&pg->uobject->vmobjlock); mutex_exit(&anon->an_lock); UVMHIST_LOG(loanhist, "->K done", 0,0,0,0); return (1); }
int ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, int flags) { off_t neweof; /* file size after the operation */ off_t neweob; /* offset next to the last block after the operation */ off_t pagestart; /* starting offset of range covered by pgs */ off_t eob; /* offset next to allocated blocks */ struct uvm_object *uobj; int i, delta, error, npages; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1 << bshift; int ppb = MAX(bsize >> PAGE_SHIFT, 1); struct vm_page **pgs; size_t pgssize; UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", vp, off, len, vp->v_size); neweof = MAX(vp->v_size, off + len); GOP_SIZE(vp, neweof, &neweob, 0); error = 0; uobj = &vp->v_uobj; /* * read or create pages covering the range of the allocation and * keep them locked until the new block is allocated, so there * will be no window where the old contents of the new block are * visible to racing threads. */ pagestart = trunc_page(off) & ~(bsize - 1); npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT); pgssize = npages * sizeof(struct vm_page *); pgs = kmem_zalloc(pgssize, KM_SLEEP); mutex_enter(&uobj->vmobjlock); error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, VM_PROT_WRITE, 0, PGO_SYNCIO|PGO_PASTEOF|PGO_NOBLOCKALLOC|PGO_NOTIMESTAMP); if (error) { goto out; } mutex_enter(&uobj->vmobjlock); mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); KASSERT((pgs[i]->flags & PG_RELEASED) == 0); pgs[i]->flags &= ~PG_CLEAN; uvm_pageactivate(pgs[i]); } mutex_exit(&uvm_pageqlock); mutex_exit(&uobj->vmobjlock); /* * adjust off to be block-aligned. */ delta = off & (bsize - 1); off -= delta; len += delta; /* * now allocate the range. */ genfs_node_wrlock(vp); error = GOP_ALLOC(vp, off, len, flags, cred); genfs_node_unlock(vp); /* * clear PG_RDONLY on any pages we are holding * (since they now have backing store) and unbusy them. */ GOP_SIZE(vp, off + len, &eob, 0); mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { if (error) { pgs[i]->flags |= PG_RELEASED; } else if (off <= pagestart + (i << PAGE_SHIFT) && pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { pgs[i]->flags &= ~PG_RDONLY; } } if (error) { mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); mutex_exit(&uvm_pageqlock); } else { uvm_page_unbusy(pgs, npages); } mutex_exit(&uobj->vmobjlock); out: kmem_free(pgs, pgssize); return error; }
static int uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) { struct vm_amap *amap = ufi->entry->aref.ar_amap; struct uvm_object *uobj = ufi->entry->object.uvm_obj; struct vm_page *pg; struct vm_anon *anon; int error, npages; bool locked; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * first we must make sure the page is resident. * * XXXCDC: duplicate code with uvm_fault(). */ mutex_enter(&uobj->vmobjlock); if (uobj->pgops->pgo_get) { /* try locked pgo_get */ npages = 1; pg = NULL; error = (*uobj->pgops->pgo_get)(uobj, va - ufi->entry->start + ufi->entry->offset, &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); } else { error = EIO; /* must have pgo_get op */ } /* * check the result of the locked pgo_get. if there is a problem, * then we fail the loan. */ if (error && error != EBUSY) { uvmfault_unlockall(ufi, amap, uobj, NULL); return (-1); } /* * if we need to unlock for I/O, do so now. */ if (error == EBUSY) { uvmfault_unlockall(ufi, amap, NULL, NULL); /* locked: uobj */ npages = 1; error = (*uobj->pgops->pgo_get)(uobj, va - ufi->entry->start + ufi->entry->offset, &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); /* locked: <nothing> */ if (error) { if (error == EAGAIN) { tsleep(&lbolt, PVM, "fltagain2", 0); return (0); } return (-1); } /* * pgo_get was a success. attempt to relock everything. */ locked = uvmfault_relock(ufi); if (locked && amap) amap_lock(amap); uobj = pg->uobject; mutex_enter(&uobj->vmobjlock); /* * verify that the page has not be released and re-verify * that amap slot is still free. if there is a problem we * drop our lock (thus force a lookup refresh/retry). */ if ((pg->flags & PG_RELEASED) != 0 || (locked && amap && amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start))) { if (locked) uvmfault_unlockall(ufi, amap, NULL, NULL); locked = false; } /* * didn't get the lock? release the page and retry. */ if (locked == false) { if (pg->flags & PG_WANTED) { wakeup(pg); } if (pg->flags & PG_RELEASED) { mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); mutex_exit(&uvm_pageqlock); mutex_exit(&uobj->vmobjlock); return (0); } mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); mutex_exit(&uobj->vmobjlock); return (0); } } KASSERT(uobj == pg->uobject); /* * at this point we have the page we want ("pg") marked PG_BUSY for us * and we have all data structures locked. do the loanout. page can * not be PG_RELEASED (we caught this above). */ if ((flags & UVM_LOAN_TOANON) == 0) { if (uvm_loanpage(&pg, 1)) { uvmfault_unlockall(ufi, amap, uobj, NULL); return (-1); } mutex_exit(&uobj->vmobjlock); **output = pg; (*output)++; return (1); } /* * must be a loan to an anon. check to see if there is already * an anon associated with this page. if so, then just return * a reference to this object. the page should already be * mapped read-only because it is already on loan. */ if (pg->uanon) { anon = pg->uanon; mutex_enter(&anon->an_lock); anon->an_ref++; mutex_exit(&anon->an_lock); if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); mutex_exit(&uobj->vmobjlock); **output = anon; (*output)++; return (1); } /* * need to allocate a new anon */ anon = uvm_analloc(); if (anon == NULL) { goto fail; } anon->an_page = pg; pg->uanon = anon; mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); pg->uanon = NULL; anon->an_page = NULL; anon->an_ref--; mutex_exit(&anon->an_lock); uvm_anfree(anon); goto fail; } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } pg->loan_count++; uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); mutex_exit(&uobj->vmobjlock); mutex_exit(&anon->an_lock); **output = anon; (*output)++; return (1); fail: UVMHIST_LOG(loanhist, "fail", 0,0,0,0); /* * unlock everything and bail out. */ if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(ufi, amap, uobj, NULL); return (-1); }
int ulfs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, int flags) { off_t neweof; /* file size after the operation */ off_t neweob; /* offset next to the last block after the operation */ off_t pagestart; /* starting offset of range covered by pgs */ off_t eob; /* offset next to allocated blocks */ struct uvm_object *uobj; int i, delta, error, npages; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1 << bshift; int ppb = MAX(bsize >> PAGE_SHIFT, 1); struct vm_page **pgs; size_t pgssize; UVMHIST_FUNC("ulfs_balloc_range"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", vp, off, len, vp->v_size); neweof = MAX(vp->v_size, off + len); GOP_SIZE(vp, neweof, &neweob, 0); error = 0; uobj = &vp->v_uobj; /* * read or create pages covering the range of the allocation and * keep them locked until the new block is allocated, so there * will be no window where the old contents of the new block are * visible to racing threads. */ pagestart = trunc_page(off) & ~(bsize - 1); npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT); pgssize = npages * sizeof(struct vm_page *); pgs = kmem_zalloc(pgssize, KM_SLEEP); /* * adjust off to be block-aligned. */ delta = off & (bsize - 1); off -= delta; len += delta; genfs_node_wrlock(vp); mutex_enter(uobj->vmobjlock); error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC | PGO_NOTIMESTAMP | PGO_GLOCKHELD); if (error) { goto out; } /* * now allocate the range. */ error = GOP_ALLOC(vp, off, len, flags, cred); genfs_node_unlock(vp); /* * if the allocation succeeded, clear PG_CLEAN on all the pages * and clear PG_RDONLY on any pages that are now fully backed * by disk blocks. if the allocation failed, we do not invalidate * the pages since they might have already existed and been dirty, * in which case we need to keep them around. if we created the pages, * they will be clean and read-only, and leaving such pages * in the cache won't cause any problems. */ GOP_SIZE(vp, off + len, &eob, 0); mutex_enter(uobj->vmobjlock); mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { KASSERT((pgs[i]->flags & PG_RELEASED) == 0); if (!error) { if (off <= pagestart + (i << PAGE_SHIFT) && pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { pgs[i]->flags &= ~PG_RDONLY; } pgs[i]->flags &= ~PG_CLEAN; } uvm_pageactivate(pgs[i]); } mutex_exit(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); out: kmem_free(pgs, pgssize); return error; }