static void less_pages(uint64_t base, uint64_t len) { uint64_t pa, end = base + len; extern int kcage_on; for (pa = base; pa < end; pa += PAGESIZE) { pfn_t pfnum; page_t *pp; pfnum = (pfn_t)(pa >> PAGESHIFT); if ((pp = page_numtopp_nolock(pfnum)) == NULL) cmn_err(CE_PANIC, "missing pfnum %lx", pfnum); /* * must break up any large pages that may have * constituent pages being utilized for * prom_alloc()'s. page_reclaim() can't handle * large pages. */ if (pp->p_szc != 0) page_boot_demote(pp); if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) { /* * Ahhh yes, a prom page, * suck it off the freelist, * lock it, and hashin on prom_pages vp. */ if (page_trylock(pp, SE_EXCL) == 0) cmn_err(CE_PANIC, "prom page locked"); (void) page_reclaim(pp, NULL); /* * vnode offsets on the prom_ppages vnode * are page numbers (gack) for >32 bit * physical memory machines. */ (void) page_hashin(pp, &promvp, (offset_t)pfnum, NULL); if (kcage_on) { ASSERT(pp->p_szc == 0); if (PP_ISNORELOC(pp) == 0) { PP_SETNORELOC(pp); PLCNT_XFER_NORELOC(pp); } } (void) page_pp_lock(pp, 0, 1); } } }
/* * page_retire_pp() decides what to do with a failing page. * * When we get a free page (e.g. the scrubber or in the free path) life is * nice because the page is clean and marked free -- those always retire * nicely. From there we go by order of difficulty. If the page has data, * we attempt to relocate its contents to a suitable replacement page. If * that does not succeed, we look to see if it is clean. If after all of * this we have a clean, unmapped page (which we usually do!), we retire it. * If the page is not clean, we still process it regardless on a UE; for * CEs or FMA requests, we fail leaving the page in service. The page will * eventually be tried again later. We always return with the page unlocked * since we are called from page_unlock(). * * We don't call panic or do anything fancy down in here. Our boss the DE * gets paid handsomely to do his job of figuring out what to do when errors * occur. We just do what he tells us to do. */ static int page_retire_pp(page_t *pp) { int toxic; ASSERT(PAGE_EXCL(pp)); ASSERT(pp->p_iolock_state == 0); ASSERT(pp->p_szc == 0); PR_DEBUG(prd_top); PR_TYPES(pp); toxic = pp->p_toxic; ASSERT(toxic & PR_REASONS); if ((toxic & (PR_FMA | PR_MCE)) && !(toxic & PR_UE) && page_retire_limit()) { page_clrtoxic(pp, PR_FMA | PR_MCE | PR_MSG | PR_BUSY); page_retire_dequeue(pp); page_unlock(pp); return (page_retire_done(pp, PRD_LIMIT)); } if (PP_ISFREE(pp)) { int dbgnoreclaim = MTBF(recl_calls, recl_mtbf) == 0; PR_DEBUG(prd_free); if (dbgnoreclaim || !page_reclaim(pp, NULL)) { PR_DEBUG(prd_noreclaim); PR_INCR_KSTAT(pr_failed); /* * page_reclaim() returns with `pp' unlocked when * it fails. */ if (dbgnoreclaim) page_unlock(pp); return (page_retire_done(pp, PRD_FAILED)); } } ASSERT(!PP_ISFREE(pp)); if ((toxic & PR_UE) == 0 && pp->p_vnode && !PP_ISNORELOCKERNEL(pp) && MTBF(reloc_calls, reloc_mtbf)) { page_t *newpp; spgcnt_t count; /* * If we can relocate the page, great! newpp will go * on without us, and everything is fine. Regardless * of whether the relocation succeeds, we are still * going to take `pp' around back and shoot it. */ newpp = NULL; if (page_relocate(&pp, &newpp, 0, 0, &count, NULL) == 0) { PR_DEBUG(prd_reloc); page_unlock(newpp); ASSERT(hat_page_getattr(pp, P_MOD) == 0); } else { PR_DEBUG(prd_relocfail); } } if (hat_ismod(pp)) { PR_DEBUG(prd_mod); PR_INCR_KSTAT(pr_failed); page_unlock(pp); return (page_retire_done(pp, PRD_FAILED)); } if (PP_ISKVP(pp)) { PR_DEBUG(prd_kern); PR_INCR_KSTAT(pr_failed_kernel); page_unlock(pp); return (page_retire_done(pp, PRD_FAILED)); } if (pp->p_lckcnt || pp->p_cowcnt) { PR_DEBUG(prd_locked); PR_INCR_KSTAT(pr_failed); page_unlock(pp); return (page_retire_done(pp, PRD_FAILED)); } (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); ASSERT(!hat_page_is_mapped(pp)); /* * If the page is modified, and was not relocated; we can't * retire it without dropping data on the floor. We have to * recheck after unloading since the dirty bit could have been * set since we last checked. */ if (hat_ismod(pp)) { PR_DEBUG(prd_mod_late); PR_INCR_KSTAT(pr_failed); page_unlock(pp); return (page_retire_done(pp, PRD_FAILED)); } if (pp->p_vnode) { PR_DEBUG(prd_hashout); page_hashout(pp, NULL); } ASSERT(!pp->p_vnode); /* * The problem page is locked, demoted, unmapped, not free, * hashed out, and not COW or mlocked (whew!). * * Now we select our ammunition, take it around back, and shoot it. */ if (toxic & PR_UE) { if (page_retire_transient_ue(pp)) { PR_DEBUG(prd_uescrubbed); return (page_retire_done(pp, PRD_UE_SCRUBBED)); } else { PR_DEBUG(prd_uenotscrubbed); page_retire_destroy(pp); return (page_retire_done(pp, PRD_SUCCESS)); } } else if (toxic & PR_FMA) { PR_DEBUG(prd_fma); page_retire_destroy(pp); return (page_retire_done(pp, PRD_SUCCESS)); } else if (toxic & PR_MCE) { PR_DEBUG(prd_mce); page_retire_destroy(pp); return (page_retire_done(pp, PRD_SUCCESS)); } panic("page_retire_pp: bad toxic flags %d", toxic); /*NOTREACHED*/ }
/* * With the addition of reader-writer lock semantics to page_lock_es, * callers wanting an exclusive (writer) lock may prevent shared-lock * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. * In this case, when an exclusive lock cannot be acquired, p_selock's * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied * if the page is slated for retirement. * * The se and es parameters determine if the lock should be granted * based on the following decision table: * * Lock wanted es flags p_selock/SE_EWANTED Action * ----------- -------------- ------------------- --------- * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED * SE_EXCL none any lock/any deny * SE_SHARED n/a [2] shared/0 grant * SE_SHARED n/a [2] unlocked/0 grant * SE_SHARED n/a shared/1 deny * SE_SHARED n/a unlocked/1 deny * SE_SHARED n/a excl/any deny * * Notes: * [1] The code grants an exclusive lock to the caller and clears the bit * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED * bit's value. This was deemed acceptable as we are not concerned about * exclusive-lock starvation. If this ever becomes an issue, a priority or * fifo mechanism should also be implemented. Meantime, the thread that * set SE_EWANTED should be prepared to catch this condition and reset it * * [2] Retired pages may not be locked at any time, regardless of the * dispostion of se, unless the es parameter has SE_RETIRED flag set. * * Notes on values of "es": * * es & 1: page_lookup_create will attempt page relocation * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete * memory thread); this prevents reader-starvation of waiting * writer thread(s) by giving priority to writers over readers. * es & SE_RETIRED: caller wants to lock pages even if they are * retired. Default is to deny the lock if the page is retired. * * And yes, we know, the semantics of this function are too complicated. * It's on the list to be cleaned up. */ int page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) { int retval; kmutex_t *pse = PAGE_SE_MUTEX(pp); int upgraded; int reclaim_it; ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); VM_STAT_ADD(page_lock_count); upgraded = 0; reclaim_it = 0; mutex_enter(pse); ASSERT(((es & SE_EXCL_WANTED) == 0) || ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { mutex_exit(pse); VM_STAT_ADD(page_lock_retired); return (0); } if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { se = SE_EXCL; } if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { reclaim_it = 1; if (se == SE_SHARED) { /* * This is an interesting situation. * * Remember that p_free can only change if * p_selock < 0. * p_free does not depend on our holding `pse'. * And, since we hold `pse', p_selock can not change. * So, if p_free changes on us, the page is already * exclusively held, and we would fail to get p_selock * regardless. * * We want to avoid getting the share * lock on a free page that needs to be reclaimed. * It is possible that some other thread has the share * lock and has left the free page on the cache list. * pvn_vplist_dirty() does this for brief periods. * If the se_share is currently SE_EXCL, we will fail * to acquire p_selock anyway. Blocking is the * right thing to do. * If we need to reclaim this page, we must get * exclusive access to it, force the upgrade now. * Again, we will fail to acquire p_selock if the * page is not free and block. */ upgraded = 1; se = SE_EXCL; VM_STAT_ADD(page_lock_upgrade); } } if (se == SE_EXCL) { if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { /* * if the caller wants a writer lock (but did not * specify exclusive access), and there is a pending * writer that wants exclusive access, return failure */ retval = 0; } else if ((pp->p_selock & ~SE_EWANTED) == 0) { /* no reader/writer lock held */ THREAD_KPRI_REQUEST(); /* this clears our setting of the SE_EWANTED bit */ pp->p_selock = SE_WRITER; retval = 1; } else { /* page is locked */ if (es & SE_EXCL_WANTED) { /* set the SE_EWANTED bit */ pp->p_selock |= SE_EWANTED; } retval = 0; } } else { retval = 0; if (pp->p_selock >= 0) { if ((pp->p_selock & SE_EWANTED) == 0) { pp->p_selock += SE_READER; retval = 1; } } } if (retval == 0) { if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { VM_STAT_ADD(page_lock_deleted); mutex_exit(pse); return (retval); } #ifdef VM_STATS VM_STAT_ADD(page_lock_miss); if (upgraded) { VM_STAT_ADD(page_lock_upgrade_failed); } #endif if (lock) { VM_STAT_ADD(page_lock_miss_lock); mutex_exit(lock); } /* * Now, wait for the page to be unlocked and * release the lock protecting p_cv and p_selock. */ cv_wait(&pp->p_cv, pse); mutex_exit(pse); /* * The page identity may have changed while we were * blocked. If we are willing to depend on "pp" * still pointing to a valid page structure (i.e., * assuming page structures are not dynamically allocated * or freed), we could try to lock the page if its * identity hasn't changed. * * This needs to be measured, since we come back from * cv_wait holding pse (the expensive part of this * operation) we might as well try the cheap part. * Though we would also have to confirm that dropping * `lock' did not cause any grief to the callers. */ if (lock) { mutex_enter(lock); } } else { /* * We have the page lock. * If we needed to reclaim the page, and the page * needed reclaiming (ie, it was free), then we * have the page exclusively locked. We may need * to downgrade the page. */ ASSERT((upgraded) ? ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); mutex_exit(pse); /* * We now hold this page's lock, either shared or * exclusive. This will prevent its identity from changing. * The page, however, may or may not be free. If the caller * requested, and it is free, go reclaim it from the * free list. If the page can't be reclaimed, return failure * so that the caller can start all over again. * * NOTE:page_reclaim() releases the page lock (p_selock) * if it can't be reclaimed. */ if (reclaim_it) { if (!page_reclaim(pp, lock)) { VM_STAT_ADD(page_lock_bad_reclaim); retval = 0; } else { VM_STAT_ADD(page_lock_reclaim); if (upgraded) { page_downgrade(pp); } } } } return (retval); }