/* * Allocate pages to back the virtual address range [addr, addr + size). * If addr is NULL, allocate the virtual address space as well. */ void * segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg) { page_t *ppl; caddr_t addr = inaddr; pgcnt_t npages = btopr(size); int allocflag; if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL) return (NULL); ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0); if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { if (inaddr == NULL) vmem_free(vmp, addr, size); return (NULL); } ppl = page_create_func(addr, size, vmflag, pcarg); if (ppl == NULL) { if (inaddr == NULL) vmem_free(vmp, addr, size); page_unresv(npages); return (NULL); } /* * Under certain conditions, we need to let the HAT layer know * that it cannot safely allocate memory. Allocations from * the hat_memload vmem arena always need this, to prevent * infinite recursion. * * In addition, the x86 hat cannot safely do memory * allocations while in vmem_populate(), because there * is no simple bound on its usage. */ if (vmflag & VM_MEMLOAD) allocflag = HAT_NO_KALLOC; #if defined(__x86) else if (vmem_is_populator()) allocflag = HAT_NO_KALLOC; #endif else allocflag = 0; while (ppl != NULL) { page_t *pp = ppl; page_sub(&ppl, pp); ASSERT(page_iolock_assert(pp)); ASSERT(PAGE_EXCL(pp)); page_io_unlock(pp); hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK | allocflag); pp->p_lckcnt = 1; #if defined(__x86) page_downgrade(pp); #else if (vmflag & SEGKMEM_SHARELOCKED) page_downgrade(pp); else page_unlock(pp); #endif } return (addr); }
void pvn_write_done(page_t *plist, int flags) { int dfree = 0; int pgrec = 0; int pgout = 0; int pgpgout = 0; int anonpgout = 0; int anonfree = 0; int fspgout = 0; int fsfree = 0; int execpgout = 0; int execfree = 0; page_t *pp; struct cpu *cpup; struct vnode *vp = NULL; /* for probe */ uint_t ppattr; kmutex_t *vphm = NULL; ASSERT((flags & B_READ) == 0); /* * If we are about to start paging anyway, start freeing pages. */ if (write_free && freemem < lotsfree + pages_before_pager && (flags & B_ERROR) == 0) { flags |= B_FREE; } /* * Handle each page involved in the i/o operation. */ while (plist != NULL) { pp = plist; ASSERT(PAGE_LOCKED(pp) && page_iolock_assert(pp)); page_sub(&plist, pp); /* Kernel probe support */ if (vp == NULL) vp = pp->p_vnode; if (((flags & B_ERROR) == 0) && IS_VMODSORT(vp)) { /* * Move page to the top of the v_page list. * Skip pages modified during IO. */ vphm = page_vnode_mutex(vp); mutex_enter(vphm); if ((pp->p_vpnext != pp) && !hat_ismod(pp)) { page_vpsub(&vp->v_pages, pp); page_vpadd(&vp->v_pages, pp); } mutex_exit(vphm); } if (flags & B_ERROR) { /* * Write operation failed. We don't want * to destroy (or free) the page unless B_FORCE * is set. We set the mod bit again and release * all locks on the page so that it will get written * back again later when things are hopefully * better again. * If B_INVAL and B_FORCE is set we really have * to destroy the page. */ if ((flags & (B_INVAL|B_FORCE)) == (B_INVAL|B_FORCE)) { page_io_unlock(pp); /*LINTED: constant in conditional context*/ VN_DISPOSE(pp, B_INVAL, 0, kcred); } else { hat_setmod_only(pp); page_io_unlock(pp); page_unlock(pp); } } else if (flags & B_INVAL) { /* * XXX - Failed writes with B_INVAL set are * not handled appropriately. */ page_io_unlock(pp); /*LINTED: constant in conditional context*/ VN_DISPOSE(pp, B_INVAL, 0, kcred); } else if (flags & B_FREE ||!hat_page_is_mapped(pp)) { /* * Update statistics for pages being paged out */ if (pp->p_vnode) { if (IS_SWAPFSVP(pp->p_vnode)) { anonpgout++; } else { if (pp->p_vnode->v_flag & VVMEXEC) { execpgout++; } else { fspgout++; } } } page_io_unlock(pp); pgout = 1; pgpgout++; TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT, "page_ws_out:pp %p", pp); /* * The page_struct_lock need not be acquired to * examine "p_lckcnt" and "p_cowcnt" since we'll * have an "exclusive" lock if the upgrade succeeds. */ if (page_tryupgrade(pp) && pp->p_lckcnt == 0 && pp->p_cowcnt == 0) { /* * Check if someone has reclaimed the * page. If ref and mod are not set, no * one is using it so we can free it. * The rest of the system is careful * to use the NOSYNC flag to unload * translations set up for i/o w/o * affecting ref and mod bits. * * Obtain a copy of the real hardware * mod bit using hat_pagesync(pp, HAT_DONTZERO) * to avoid having to flush the cache. */ ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD); ck_refmod: if (!(ppattr & (P_REF | P_MOD))) { if (hat_page_is_mapped(pp)) { /* * Doesn't look like the page * was modified so now we * really have to unload the * translations. Meanwhile * another CPU could've * modified it so we have to * check again. We don't loop * forever here because now * the translations are gone * and no one can get a new one * since we have the "exclusive" * lock on the page. */ (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); ppattr = hat_page_getattr(pp, P_REF | P_MOD); goto ck_refmod; } /* * Update statistics for pages being * freed */ if (pp->p_vnode) { if (IS_SWAPFSVP(pp->p_vnode)) { anonfree++; } else { if (pp->p_vnode->v_flag & VVMEXEC) { execfree++; } else { fsfree++; } } } /*LINTED: constant in conditional ctx*/ VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred); dfree++; } else { page_unlock(pp); pgrec++; TRACE_1(TR_FAC_VM, TR_PAGE_WS_FREE, "page_ws_free:pp %p", pp); } } else { /* * Page is either `locked' in memory * or was reclaimed and now has a * "shared" lock, so release it. */ page_unlock(pp); } } else { /* * Neither B_FREE nor B_INVAL nor B_ERROR. * Just release locks. */ page_io_unlock(pp); page_unlock(pp); } } CPU_STATS_ENTER_K(); cpup = CPU; /* get cpup now that CPU cannot change */ CPU_STATS_ADDQ(cpup, vm, dfree, dfree); CPU_STATS_ADDQ(cpup, vm, pgrec, pgrec); CPU_STATS_ADDQ(cpup, vm, pgout, pgout); CPU_STATS_ADDQ(cpup, vm, pgpgout, pgpgout); CPU_STATS_ADDQ(cpup, vm, anonpgout, anonpgout); CPU_STATS_ADDQ(cpup, vm, anonfree, anonfree); CPU_STATS_ADDQ(cpup, vm, fspgout, fspgout); CPU_STATS_ADDQ(cpup, vm, fsfree, fsfree); CPU_STATS_ADDQ(cpup, vm, execpgout, execpgout); CPU_STATS_ADDQ(cpup, vm, execfree, execfree); CPU_STATS_EXIT_K(); /* Kernel probe */ TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */, tnf_opaque, vnode, vp, tnf_ulong, pages_pageout, pgpgout, tnf_ulong, pages_freed, dfree, tnf_ulong, pages_reclaimed, pgrec); }
/* * Allocate a large page to back the virtual address range * [addr, addr + size). If addr is NULL, allocate the virtual address * space as well. */ static void * segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg) { caddr_t addr = inaddr, pa; size_t lpsize = segkmem_lpsize; pgcnt_t npages = btopr(size); pgcnt_t nbpages = btop(lpsize); pgcnt_t nlpages = size >> segkmem_lpshift; size_t ppasize = nbpages * sizeof (page_t *); page_t *pp, *rootpp, **ppa, *pplist = NULL; int i; vmflag |= VM_NOSLEEP; if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { return (NULL); } /* * allocate an array we need for hat_memload_array. * we use a separate arena to avoid recursion. * we will not need this array when hat_memload_array learns pp++ */ if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) { goto fail_array_alloc; } if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL) goto fail_vmem_alloc; ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0); /* create all the pages */ for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) { if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL) goto fail_page_create; page_list_concat(&pplist, &pp); } /* at this point we have all the resource to complete the request */ while ((rootpp = pplist) != NULL) { for (i = 0; i < nbpages; i++) { ASSERT(pplist != NULL); pp = pplist; page_sub(&pplist, pp); ASSERT(page_iolock_assert(pp)); page_io_unlock(pp); ppa[i] = pp; } /* * Load the locked entry. It's OK to preload the entry into the * TSB since we now support large mappings in the kernel TSB. */ hat_memload_array(kas.a_hat, (caddr_t)(uintptr_t)rootpp->p_offset, lpsize, ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); for (--i; i >= 0; --i) { ppa[i]->p_lckcnt = 1; page_unlock(ppa[i]); } } vmem_free(segkmem_ppa_arena, ppa, ppasize); return (addr); fail_page_create: while ((rootpp = pplist) != NULL) { for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) { ASSERT(pp != NULL); page_sub(&pplist, pp); ASSERT(page_iolock_assert(pp)); page_io_unlock(pp); } page_destroy_pages(rootpp); } if (inaddr == NULL) vmem_free(vmp, addr, size); fail_vmem_alloc: vmem_free(segkmem_ppa_arena, ppa, ppasize); fail_array_alloc: page_unresv(npages); return (NULL); }