/* * Acquire a page's "shared/exclusive" lock, but never block. * Returns 1 on success, 0 on failure. */ int page_trylock(page_t *pp, se_t se) { kmutex_t *pse = PAGE_SE_MUTEX(pp); mutex_enter(pse); if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) || (se == SE_SHARED && PP_PR_NOSHARE(pp))) { /* * Fail if a thread wants exclusive access and page is * retired, if the page is slated for retirement, or a * share lock is requested. */ mutex_exit(pse); VM_STAT_ADD(page_trylock_failed); return (0); } if (se == SE_EXCL) { if (pp->p_selock == 0) { THREAD_KPRI_REQUEST(); pp->p_selock = SE_WRITER; mutex_exit(pse); return (1); } } else { if (pp->p_selock >= 0) { pp->p_selock += SE_READER; mutex_exit(pse); return (1); } } mutex_exit(pse); return (0); }
/* * Test a page to see if it is retired. If errors is non-NULL, the toxic * bits of the page are returned. Returns 0 on success, error code on failure. */ int page_retire_check_pp(page_t *pp, uint64_t *errors) { int rc; if (PP_RETIRED(pp)) { PR_DEBUG(prd_checkhit); rc = 0; } else if (PP_PR_REQ(pp)) { PR_DEBUG(prd_checkmiss_pend); rc = EAGAIN; } else { PR_DEBUG(prd_checkmiss_noerr); rc = EIO; } /* * We have magically arranged the bit values returned to fmd(1M) * to line up with the FMA, MCE, and UE bits of the page_t. */ if (errors) { uint64_t toxic = (uint64_t)(pp->p_toxic & PR_ERRMASK); if (toxic & PR_UE_SCRUBBED) { toxic &= ~PR_UE_SCRUBBED; toxic |= PR_UE; } *errors = toxic; } return (rc); }
/* * Take a retired page off the retired-pages vnode and clear the toxic flags. * If "free" is nonzero, lock it and put it back on the freelist. If "free" * is zero, the caller already holds SE_EXCL lock so we simply unretire it * and don't do anything else with it. * * Any unretire messages are printed from this routine. * * Returns 0 if page pp was unretired; else an error code. */ int page_unretire_pp(page_t *pp, int free) { /* * To be retired, a page has to be hashed onto the retired_pages vnode * and have PR_RETIRED set in p_toxic. */ if (free == 0 || page_try_reclaim_lock(pp, SE_EXCL, SE_RETIRED)) { ASSERT(PAGE_EXCL(pp)); PR_DEBUG(prd_ulocked); if (!PP_RETIRED(pp)) { PR_DEBUG(prd_unotretired); page_unlock(pp); return (page_retire_done(pp, PRD_UNR_NOT)); } PR_MESSAGE(CE_NOTE, 1, "unretiring retired" " page 0x%08x.%08x", mmu_ptob((uint64_t)pp->p_pagenum)); if (pp->p_toxic & PR_FMA) { PR_DECR_KSTAT(pr_fma); } else if (pp->p_toxic & PR_UE) { PR_DECR_KSTAT(pr_ue); } else { PR_DECR_KSTAT(pr_mce); } page_clrtoxic(pp, PR_ALLFLAGS); if (free) { PR_DEBUG(prd_udestroy); page_destroy(pp, 0); } else { PR_DEBUG(prd_uhashout); page_hashout(pp, NULL); } mutex_enter(&freemem_lock); availrmem++; mutex_exit(&freemem_lock); PR_DEBUG(prd_uunretired); PR_DECR_KSTAT(pr_retired); PR_INCR_KSTAT(pr_unretired); return (page_retire_done(pp, PRD_UNR_SUCCESS)); } PR_DEBUG(prd_unotlocked); return (page_retire_done(pp, PRD_UNR_CANTLOCK)); }
/* * Release the page's "shared/exclusive" lock and wake up anyone * who might be waiting for it. */ void page_unlock(page_t *pp) { kmutex_t *pse = PAGE_SE_MUTEX(pp); selock_t old; mutex_enter(pse); old = pp->p_selock; if ((old & ~SE_EWANTED) == SE_READER) { pp->p_selock = old & ~SE_READER; if (CV_HAS_WAITERS(&pp->p_cv)) cv_broadcast(&pp->p_cv); } else if ((old & ~SE_EWANTED) == SE_DELETED) { panic("page_unlock: page %p is deleted", pp); } else if (old < 0) { THREAD_KPRI_RELEASE(); pp->p_selock &= SE_EWANTED; if (CV_HAS_WAITERS(&pp->p_cv)) cv_broadcast(&pp->p_cv); } else if ((old & ~SE_EWANTED) > SE_READER) { pp->p_selock = old - SE_READER; } else { panic("page_unlock: page %p is not locked", pp); } if (pp->p_selock == 0) { /* * If the T_CAPTURING bit is set, that means that we should * not try and capture the page again as we could recurse * which could lead to a stack overflow panic or spending a * relatively long time in the kernel making no progress. */ if ((pp->p_toxic & PR_CAPTURE) && !(curthread->t_flag & T_CAPTURING) && !PP_RETIRED(pp)) { THREAD_KPRI_REQUEST(); pp->p_selock = SE_WRITER; mutex_exit(pse); page_unlock_capture(pp); } else { mutex_exit(pse); } } else { mutex_exit(pse); } }
/* * On a reboot, our friend mdboot() wants to clear up any PP_PR_REQ() pages * that we were not able to retire. On large machines, walking the complete * page_t array and looking at every page_t takes too long. So, as a page is * marked toxic, we track it using a list that can be processed at reboot * time. page_retire_enqueue() will do its best to try to avoid duplicate * entries, but if we get too many errors at once the queue can overflow, * in which case we will end up walking every page_t as a last resort. * The background thread also makes use of this queue to find which pages * are pending retirement. */ static void page_retire_enqueue(page_t *pp) { int nslot = -1; int i; mutex_enter(&pr_q_mutex); /* * Check to make sure retire hasn't already dequeued it. * In the meantime if the page was cleaned up, no need * to enqueue it. */ if (PP_RETIRED(pp) || pp->p_toxic == 0) { mutex_exit(&pr_q_mutex); PR_DEBUG(prd_noaction); return; } for (i = 0; i < PR_PENDING_QMAX; i++) { if (pr_pending_q[i] == pp) { mutex_exit(&pr_q_mutex); PR_DEBUG(prd_qdup); return; } else if (nslot == -1 && pr_pending_q[i] == NULL) { nslot = i; } } PR_INCR_KSTAT(pr_pending); if (nslot != -1) { pr_pending_q[nslot] = pp; PR_DEBUG(prd_queued); } else { PR_INCR_KSTAT(pr_enqueue_fail); PR_DEBUG(prd_notqueued); } mutex_exit(&pr_q_mutex); }
/* * page_retire() - the front door in to retire a page. * * Ideally, page_retire() would instantly retire the requested page. * Unfortunately, some pages are locked or otherwise tied up and cannot be * retired right away. To deal with that, bits are set in p_toxic of the * page_t. An attempt is made to lock the page; if the attempt is successful, * we instantly unlock the page counting on page_unlock() to notice p_toxic * is nonzero and to call back into page_retire_pp(). Success is determined * by looking to see whether the page has been retired once it has been * unlocked. * * Returns: * * - 0 on success, * - EINVAL when the PA is whacko, * - EIO if the page is already retired or already pending retirement, or * - EAGAIN if the page could not be _immediately_ retired but is pending. */ int page_retire(uint64_t pa, uchar_t reason) { page_t *pp; ASSERT(reason & PR_REASONS); /* there must be a reason */ ASSERT(!(reason & ~PR_REASONS)); /* but no other bits */ pp = page_numtopp_nolock(mmu_btop(pa)); if (pp == NULL) { PR_MESSAGE(CE_WARN, 1, "Cannot schedule clearing of error on" " page 0x%08x.%08x; page is not relocatable memory", pa); return (page_retire_done(pp, PRD_INVALID_PA)); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_dup1); return (page_retire_done(pp, PRD_DUPLICATE)); } if ((reason & PR_UE) && !PP_TOXIC(pp)) { PR_MESSAGE(CE_NOTE, 1, "Scheduling clearing of error on" " page 0x%08x.%08x", pa); } else if (PP_PR_REQ(pp)) { PR_DEBUG(prd_dup2); return (page_retire_done(pp, PRD_DUPLICATE)); } else { PR_MESSAGE(CE_NOTE, 1, "Scheduling removal of" " page 0x%08x.%08x", pa); } page_settoxic(pp, reason); page_retire_enqueue(pp); /* * And now for some magic. * * We marked this page toxic up above. All there is left to do is * to try to lock the page and then unlock it. The page lock routines * will intercept the page and retire it if they can. If the page * cannot be locked, 's okay -- page_unlock() will eventually get it, * or the background thread, until then the lock routines will deny * further locks on it. */ if (MTBF(pr_calls, pr_mtbf) && page_trylock(pp, SE_EXCL)) { PR_DEBUG(prd_prlocked); page_unlock(pp); } else { PR_DEBUG(prd_prnotlocked); } if (PP_RETIRED(pp)) { PR_DEBUG(prd_prretired); return (0); } else { cv_signal(&pr_cv); PR_INCR_KSTAT(pr_failed); if (pp->p_toxic & PR_MSG) { return (page_retire_done(pp, PRD_FAILED)); } else { return (page_retire_done(pp, PRD_PENDING)); } } }
/* * Read the comments inside of page_lock_es() carefully. * * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. * This is used by threads subject to reader-starvation (eg. memory delete). * * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, * it is expected that it will retry at a later time. Threads that will * not retry the lock *must* call page_lock_clr_exclwanted to clear the * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, * the bit is cleared.) */ int page_try_reclaim_lock(page_t *pp, se_t se, int es) { kmutex_t *pse = PAGE_SE_MUTEX(pp); selock_t old; mutex_enter(pse); old = pp->p_selock; ASSERT(((es & SE_EXCL_WANTED) == 0) || ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { mutex_exit(pse); VM_STAT_ADD(page_trylock_failed); return (0); } if (se == SE_SHARED && es == 1 && old == 0) { se = SE_EXCL; } if (se == SE_SHARED) { if (!PP_ISFREE(pp)) { if (old >= 0) { /* * Readers are not allowed when excl wanted */ if ((old & SE_EWANTED) == 0) { pp->p_selock = old + SE_READER; mutex_exit(pse); return (1); } } mutex_exit(pse); return (0); } /* * The page is free, so we really want SE_EXCL (below) */ VM_STAT_ADD(page_try_reclaim_upgrade); } /* * The caller wants a writer lock. We try for it only if * SE_EWANTED is not set, or if the caller specified * SE_EXCL_WANTED. */ if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) { if ((old & ~SE_EWANTED) == 0) { /* no reader/writer lock held */ THREAD_KPRI_REQUEST(); /* this clears out our setting of the SE_EWANTED bit */ pp->p_selock = SE_WRITER; mutex_exit(pse); return (1); } } if (es & SE_EXCL_WANTED) { /* page is locked, set the SE_EWANTED bit */ pp->p_selock |= SE_EWANTED; } mutex_exit(pse); return (0); }
/* * With the addition of reader-writer lock semantics to page_lock_es, * callers wanting an exclusive (writer) lock may prevent shared-lock * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. * In this case, when an exclusive lock cannot be acquired, p_selock's * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied * if the page is slated for retirement. * * The se and es parameters determine if the lock should be granted * based on the following decision table: * * Lock wanted es flags p_selock/SE_EWANTED Action * ----------- -------------- ------------------- --------- * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED * SE_EXCL none any lock/any deny * SE_SHARED n/a [2] shared/0 grant * SE_SHARED n/a [2] unlocked/0 grant * SE_SHARED n/a shared/1 deny * SE_SHARED n/a unlocked/1 deny * SE_SHARED n/a excl/any deny * * Notes: * [1] The code grants an exclusive lock to the caller and clears the bit * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED * bit's value. This was deemed acceptable as we are not concerned about * exclusive-lock starvation. If this ever becomes an issue, a priority or * fifo mechanism should also be implemented. Meantime, the thread that * set SE_EWANTED should be prepared to catch this condition and reset it * * [2] Retired pages may not be locked at any time, regardless of the * dispostion of se, unless the es parameter has SE_RETIRED flag set. * * Notes on values of "es": * * es & 1: page_lookup_create will attempt page relocation * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete * memory thread); this prevents reader-starvation of waiting * writer thread(s) by giving priority to writers over readers. * es & SE_RETIRED: caller wants to lock pages even if they are * retired. Default is to deny the lock if the page is retired. * * And yes, we know, the semantics of this function are too complicated. * It's on the list to be cleaned up. */ int page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) { int retval; kmutex_t *pse = PAGE_SE_MUTEX(pp); int upgraded; int reclaim_it; ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); VM_STAT_ADD(page_lock_count); upgraded = 0; reclaim_it = 0; mutex_enter(pse); ASSERT(((es & SE_EXCL_WANTED) == 0) || ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { mutex_exit(pse); VM_STAT_ADD(page_lock_retired); return (0); } if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { se = SE_EXCL; } if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { reclaim_it = 1; if (se == SE_SHARED) { /* * This is an interesting situation. * * Remember that p_free can only change if * p_selock < 0. * p_free does not depend on our holding `pse'. * And, since we hold `pse', p_selock can not change. * So, if p_free changes on us, the page is already * exclusively held, and we would fail to get p_selock * regardless. * * We want to avoid getting the share * lock on a free page that needs to be reclaimed. * It is possible that some other thread has the share * lock and has left the free page on the cache list. * pvn_vplist_dirty() does this for brief periods. * If the se_share is currently SE_EXCL, we will fail * to acquire p_selock anyway. Blocking is the * right thing to do. * If we need to reclaim this page, we must get * exclusive access to it, force the upgrade now. * Again, we will fail to acquire p_selock if the * page is not free and block. */ upgraded = 1; se = SE_EXCL; VM_STAT_ADD(page_lock_upgrade); } } if (se == SE_EXCL) { if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { /* * if the caller wants a writer lock (but did not * specify exclusive access), and there is a pending * writer that wants exclusive access, return failure */ retval = 0; } else if ((pp->p_selock & ~SE_EWANTED) == 0) { /* no reader/writer lock held */ THREAD_KPRI_REQUEST(); /* this clears our setting of the SE_EWANTED bit */ pp->p_selock = SE_WRITER; retval = 1; } else { /* page is locked */ if (es & SE_EXCL_WANTED) { /* set the SE_EWANTED bit */ pp->p_selock |= SE_EWANTED; } retval = 0; } } else { retval = 0; if (pp->p_selock >= 0) { if ((pp->p_selock & SE_EWANTED) == 0) { pp->p_selock += SE_READER; retval = 1; } } } if (retval == 0) { if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { VM_STAT_ADD(page_lock_deleted); mutex_exit(pse); return (retval); } #ifdef VM_STATS VM_STAT_ADD(page_lock_miss); if (upgraded) { VM_STAT_ADD(page_lock_upgrade_failed); } #endif if (lock) { VM_STAT_ADD(page_lock_miss_lock); mutex_exit(lock); } /* * Now, wait for the page to be unlocked and * release the lock protecting p_cv and p_selock. */ cv_wait(&pp->p_cv, pse); mutex_exit(pse); /* * The page identity may have changed while we were * blocked. If we are willing to depend on "pp" * still pointing to a valid page structure (i.e., * assuming page structures are not dynamically allocated * or freed), we could try to lock the page if its * identity hasn't changed. * * This needs to be measured, since we come back from * cv_wait holding pse (the expensive part of this * operation) we might as well try the cheap part. * Though we would also have to confirm that dropping * `lock' did not cause any grief to the callers. */ if (lock) { mutex_enter(lock); } } else { /* * We have the page lock. * If we needed to reclaim the page, and the page * needed reclaiming (ie, it was free), then we * have the page exclusively locked. We may need * to downgrade the page. */ ASSERT((upgraded) ? ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); mutex_exit(pse); /* * We now hold this page's lock, either shared or * exclusive. This will prevent its identity from changing. * The page, however, may or may not be free. If the caller * requested, and it is free, go reclaim it from the * free list. If the page can't be reclaimed, return failure * so that the caller can start all over again. * * NOTE:page_reclaim() releases the page lock (p_selock) * if it can't be reclaimed. */ if (reclaim_it) { if (!page_reclaim(pp, lock)) { VM_STAT_ADD(page_lock_bad_reclaim); retval = 0; } else { VM_STAT_ADD(page_lock_reclaim); if (upgraded) { page_downgrade(pp); } } } } return (retval); }