예제 #1
0
파일: balloon.c 프로젝트: bahamas10/openzfs
/*
 * The page represented by pp is being given back to the hypervisor.
 * Add the page_t structure to our spare list.
 */
static void
balloon_page_add(page_t *pp)
{
	/*
	 * We need to keep the page exclusively locked
	 * to prevent swrand from grabbing it.
	 */
	ASSERT(PAGE_EXCL(pp));
	ASSERT(MUTEX_HELD(&bln_mutex));

	pp->p_prev = NULL;
	if (bln_spare_list_front == NULL) {
		bln_spare_list_front = bln_spare_list_back = pp;
		pp->p_next = NULL;
	} else if (pp->p_pagenum >= mfn_count) {
		/*
		 * The pfn is invalid, so add at the end of list.  Since these
		 * adds should *only* be done by balloon_init_new_pages(), and
		 * that does adds in order, the following ASSERT should
		 * never trigger.
		 */
		ASSERT(pp->p_pagenum > bln_spare_list_back->p_pagenum);
		bln_spare_list_back->p_next = pp;
		pp->p_next = NULL;
		bln_spare_list_back = pp;
	} else {
		/* Add at beginning of list */
		pp->p_next = bln_spare_list_front;
		bln_spare_list_front = pp;
	}
}
예제 #2
0
/*
 * Try to retire a page when we stumble onto it in the page lock routines.
 */
void
page_tryretire(page_t *pp)
{
	ASSERT(PAGE_EXCL(pp));

	if (!pr_enable) {
		page_unlock(pp);
		return;
	}

	/*
	 * If the page is a big page, try to break it up.
	 *
	 * If there are other bad pages besides `pp', they will be
	 * recursively retired for us thanks to a bit of magic.
	 * If the page is a small page with errors, try to retire it.
	 */
	if (pp->p_szc > 0) {
		if (PP_ISFREE(pp) && !page_try_demote_free_pages(pp)) {
			page_unlock(pp);
			PR_DEBUG(prd_nofreedemote);
			return;
		} else if (!page_try_demote_pages(pp)) {
			page_unlock(pp);
			PR_DEBUG(prd_nodemote);
			return;
		}
		PR_DEBUG(prd_demoted);
		page_unlock(pp);
	} else {
		(void) page_retire_pp(pp);
	}
}
예제 #3
0
/*
 * Take a retired page off the retired-pages vnode and clear the toxic flags.
 * If "free" is nonzero, lock it and put it back on the freelist. If "free"
 * is zero, the caller already holds SE_EXCL lock so we simply unretire it
 * and don't do anything else with it.
 *
 * Any unretire messages are printed from this routine.
 *
 * Returns 0 if page pp was unretired; else an error code.
 */
int
page_unretire_pp(page_t *pp, int free)
{
	/*
	 * To be retired, a page has to be hashed onto the retired_pages vnode
	 * and have PR_RETIRED set in p_toxic.
	 */
	if (free == 0 || page_try_reclaim_lock(pp, SE_EXCL, SE_RETIRED)) {
		ASSERT(PAGE_EXCL(pp));
		PR_DEBUG(prd_ulocked);
		if (!PP_RETIRED(pp)) {
			PR_DEBUG(prd_unotretired);
			page_unlock(pp);
			return (page_retire_done(pp, PRD_UNR_NOT));
		}

		PR_MESSAGE(CE_NOTE, 1, "unretiring retired"
		    " page 0x%08x.%08x", mmu_ptob((uint64_t)pp->p_pagenum));
		if (pp->p_toxic & PR_FMA) {
			PR_DECR_KSTAT(pr_fma);
		} else if (pp->p_toxic & PR_UE) {
			PR_DECR_KSTAT(pr_ue);
		} else {
			PR_DECR_KSTAT(pr_mce);
		}
		page_clrtoxic(pp, PR_ALLFLAGS);

		if (free) {
			PR_DEBUG(prd_udestroy);
			page_destroy(pp, 0);
		} else {
			PR_DEBUG(prd_uhashout);
			page_hashout(pp, NULL);
		}

		mutex_enter(&freemem_lock);
		availrmem++;
		mutex_exit(&freemem_lock);

		PR_DEBUG(prd_uunretired);
		PR_DECR_KSTAT(pr_retired);
		PR_INCR_KSTAT(pr_unretired);
		return (page_retire_done(pp, PRD_UNR_SUCCESS));
	}
	PR_DEBUG(prd_unotlocked);
	return (page_retire_done(pp, PRD_UNR_CANTLOCK));
}
예제 #4
0
void
page_lock_delete(page_t *pp)
{
	kmutex_t *pse = PAGE_SE_MUTEX(pp);

	ASSERT(PAGE_EXCL(pp));
	ASSERT(pp->p_vnode == NULL);
	ASSERT(pp->p_offset == (u_offset_t)-1);
	ASSERT(!PP_ISFREE(pp));

	mutex_enter(pse);
	THREAD_KPRI_RELEASE();
	pp->p_selock = SE_DELETED;
	if (CV_HAS_WAITERS(&pp->p_cv))
		cv_broadcast(&pp->p_cv);
	mutex_exit(pse);
}
예제 #5
0
/*
 * Downgrade the "exclusive" lock on the page to a "shared" lock
 * while holding the mutex protecting this page's p_selock field.
 */
void
page_downgrade(page_t *pp)
{
	kmutex_t *pse = PAGE_SE_MUTEX(pp);
	int excl_waiting;

	ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
	ASSERT(PAGE_EXCL(pp));

	mutex_enter(pse);
	excl_waiting =  pp->p_selock & SE_EWANTED;
	THREAD_KPRI_RELEASE();
	pp->p_selock = SE_READER | excl_waiting;
	if (CV_HAS_WAITERS(&pp->p_cv))
		cv_broadcast(&pp->p_cv);
	mutex_exit(pse);
}
예제 #6
0
/*
 * Try to clear a page_t with a single UE. If the UE was transient, it is
 * returned to service, and we return 1. Otherwise we return 0 meaning
 * that further processing is required to retire the page.
 */
static int
page_retire_transient_ue(page_t *pp)
{
	ASSERT(PAGE_EXCL(pp));
	ASSERT(!hat_page_is_mapped(pp));

	/*
	 * If this page is a repeat offender, retire him under the
	 * "two strikes and you're out" rule. The caller is responsible
	 * for scrubbing the page to try to clear the error.
	 */
	if (pp->p_toxic & PR_UE_SCRUBBED) {
		PR_INCR_KSTAT(pr_ue_persistent);
		return (0);
	}

	if (page_clear_transient_ue(pp)) {
		/*
		 * We set the PR_SCRUBBED_UE bit; if we ever see this
		 * page again, we will retire it, no questions asked.
		 */
		page_settoxic(pp, PR_UE_SCRUBBED);

		if (page_retire_first_ue) {
			PR_INCR_KSTAT(pr_ue_cleared_retire);
			return (0);
		} else {
			PR_INCR_KSTAT(pr_ue_cleared_free);

			page_clrtoxic(pp, PR_UE | PR_MCE | PR_MSG | PR_BUSY);
			page_retire_dequeue(pp);

			/* LINTED: CONSTCOND */
			VN_DISPOSE(pp, B_FREE, 1, kcred);
			return (1);
		}
	}

	PR_INCR_KSTAT(pr_ue_persistent);
	return (0);
}
예제 #7
0
/*
 * Act like page_destroy(), but instead of freeing the page, hash it onto
 * the retired_pages vnode, and mark it retired.
 *
 * For fun, we try to scrub the page until it's squeaky clean.
 * availrmem is adjusted here.
 */
static void
page_retire_destroy(page_t *pp)
{
	u_offset_t off = (u_offset_t)((uintptr_t)pp);

	ASSERT(PAGE_EXCL(pp));
	ASSERT(!PP_ISFREE(pp));
	ASSERT(pp->p_szc == 0);
	ASSERT(!hat_page_is_mapped(pp));
	ASSERT(!pp->p_vnode);

	page_clr_all_props(pp);
	pagescrub(pp, 0, MMU_PAGESIZE);

	pp->p_next = NULL;
	pp->p_prev = NULL;
	if (page_hashin(pp, retired_pages, off, NULL) == 0) {
		cmn_err(CE_PANIC, "retired page %p hashin failed", (void *)pp);
	}

	page_settoxic(pp, PR_RETIRED);
	page_clrtoxic(pp, PR_BUSY);
	page_retire_dequeue(pp);
	PR_INCR_KSTAT(pr_retired);

	if (pp->p_toxic & PR_FMA) {
		PR_INCR_KSTAT(pr_fma);
	} else if (pp->p_toxic & PR_UE) {
		PR_INCR_KSTAT(pr_ue);
	} else {
		PR_INCR_KSTAT(pr_mce);
	}

	mutex_enter(&freemem_lock);
	availrmem--;
	mutex_exit(&freemem_lock);

	page_unlock(pp);
}
예제 #8
0
파일: balloon.c 프로젝트: bahamas10/openzfs
/*
 * Return a page_t structure from our spare list, or NULL if none are available.
 */
static page_t *
balloon_page_sub(void)
{
	page_t *pp;

	ASSERT(MUTEX_HELD(&bln_mutex));
	if (bln_spare_list_front == NULL) {
		return (NULL);
	}

	pp = bln_spare_list_front;
	ASSERT(PAGE_EXCL(pp));
	ASSERT(pp->p_pagenum <= mfn_count);
	if (pp->p_pagenum == mfn_count) {
		return (NULL);
	}

	bln_spare_list_front = pp->p_next;
	if (bln_spare_list_front == NULL)
		bln_spare_list_back = NULL;
	pp->p_next = NULL;
	return (pp);
}
예제 #9
0
static void
segkmem_free_one_lp(caddr_t addr, size_t size)
{
	page_t		*pp, *rootpp = NULL;
	pgcnt_t 	pgs_left = btopr(size);

	ASSERT(size == segkmem_lpsize);

	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);

	for (; pgs_left > 0; addr += PAGESIZE, pgs_left--) {
		pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
		if (pp == NULL)
			panic("segkmem_free_one_lp: page not found");
		ASSERT(PAGE_EXCL(pp));
		pp->p_lckcnt = 0;
		if (rootpp == NULL)
			rootpp = pp;
	}
	ASSERT(rootpp != NULL);
	page_destroy_pages(rootpp);

	/* page_unresv() is done by the caller */
}
예제 #10
0
/*
 * Attempt to clear a UE from a page.
 * Returns 1 if the error has been successfully cleared.
 */
static int
page_clear_transient_ue(page_t *pp)
{
	caddr_t		kaddr;
	uint8_t		rb, wb;
	uint64_t	pa;
	uint32_t	pa_hi, pa_lo;
	on_trap_data_t	otd;
	int		errors = 0;
	int		i;

	ASSERT(PAGE_EXCL(pp));
	ASSERT(PP_PR_REQ(pp));
	ASSERT(pp->p_szc == 0);
	ASSERT(!hat_page_is_mapped(pp));

	/*
	 * Clear the page and attempt to clear the UE.  If we trap
	 * on the next access to the page, we know the UE has recurred.
	 */
	pagescrub(pp, 0, PAGESIZE);

	/*
	 * Map the page and write a bunch of bit patterns to compare
	 * what we wrote with what we read back.  This isn't a perfect
	 * test but it should be good enough to catch most of the
	 * recurring UEs. If this fails to catch a recurrent UE, we'll
	 * retire the page the next time we see a UE on the page.
	 */
	kaddr = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)-1);

	pa = ptob((uint64_t)page_pptonum(pp));
	pa_hi = (uint32_t)(pa >> 32);
	pa_lo = (uint32_t)pa;

	/*
	 * Fill the page with each (0x00 - 0xFF] bit pattern, flushing
	 * the cache in between reading and writing.  We do this under
	 * on_trap() protection to avoid recursion.
	 */
	if (on_trap(&otd, OT_DATA_EC)) {
		PR_MESSAGE(CE_WARN, 1, MSG_UE, pa);
		errors = 1;
	} else {
		for (wb = 0xff; wb > 0; wb--) {
			for (i = 0; i < PAGESIZE; i++) {
				kaddr[i] = wb;
			}

			sync_data_memory(kaddr, PAGESIZE);

			for (i = 0; i < PAGESIZE; i++) {
				rb = kaddr[i];
				if (rb != wb) {
					/*
					 * We had a mismatch without a trap.
					 * Uh-oh. Something is really wrong
					 * with this system.
					 */
					if (page_retire_messages) {
						cmn_err(CE_WARN, MSG_DM,
						    pa_hi, pa_lo, rb, wb);
					}
					errors = 1;
					goto out;	/* double break */
				}
			}
		}
	}
out:
	no_trap();
	ppmapout(kaddr);

	return (errors ? 0 : 1);
}
예제 #11
0
/*
 * Note that multiple bits may cleared in a single clrtoxic operation.
 * Must be called with the page exclusively locked to prevent races which
 * may attempt to retire a page without any toxic bits set.
 */
void
page_clrtoxic(page_t *pp, uchar_t bits)
{
	ASSERT(PAGE_EXCL(pp));
	atomic_and_8(&pp->p_toxic, ~bits);
}
예제 #12
0
/*
 * page_retire_pp() decides what to do with a failing page.
 *
 * When we get a free page (e.g. the scrubber or in the free path) life is
 * nice because the page is clean and marked free -- those always retire
 * nicely. From there we go by order of difficulty. If the page has data,
 * we attempt to relocate its contents to a suitable replacement page. If
 * that does not succeed, we look to see if it is clean. If after all of
 * this we have a clean, unmapped page (which we usually do!), we retire it.
 * If the page is not clean, we still process it regardless on a UE; for
 * CEs or FMA requests, we fail leaving the page in service. The page will
 * eventually be tried again later. We always return with the page unlocked
 * since we are called from page_unlock().
 *
 * We don't call panic or do anything fancy down in here. Our boss the DE
 * gets paid handsomely to do his job of figuring out what to do when errors
 * occur. We just do what he tells us to do.
 */
static int
page_retire_pp(page_t *pp)
{
	int		toxic;

	ASSERT(PAGE_EXCL(pp));
	ASSERT(pp->p_iolock_state == 0);
	ASSERT(pp->p_szc == 0);

	PR_DEBUG(prd_top);
	PR_TYPES(pp);

	toxic = pp->p_toxic;
	ASSERT(toxic & PR_REASONS);

	if ((toxic & (PR_FMA | PR_MCE)) && !(toxic & PR_UE) &&
	    page_retire_limit()) {
		page_clrtoxic(pp, PR_FMA | PR_MCE | PR_MSG | PR_BUSY);
		page_retire_dequeue(pp);
		page_unlock(pp);
		return (page_retire_done(pp, PRD_LIMIT));
	}

	if (PP_ISFREE(pp)) {
		int dbgnoreclaim = MTBF(recl_calls, recl_mtbf) == 0;

		PR_DEBUG(prd_free);

		if (dbgnoreclaim || !page_reclaim(pp, NULL)) {
			PR_DEBUG(prd_noreclaim);
			PR_INCR_KSTAT(pr_failed);
			/*
			 * page_reclaim() returns with `pp' unlocked when
			 * it fails.
			 */
			if (dbgnoreclaim)
				page_unlock(pp);
			return (page_retire_done(pp, PRD_FAILED));
		}
	}
	ASSERT(!PP_ISFREE(pp));

	if ((toxic & PR_UE) == 0 && pp->p_vnode && !PP_ISNORELOCKERNEL(pp) &&
	    MTBF(reloc_calls, reloc_mtbf)) {
		page_t *newpp;
		spgcnt_t count;

		/*
		 * If we can relocate the page, great! newpp will go
		 * on without us, and everything is fine.  Regardless
		 * of whether the relocation succeeds, we are still
		 * going to take `pp' around back and shoot it.
		 */
		newpp = NULL;
		if (page_relocate(&pp, &newpp, 0, 0, &count, NULL) == 0) {
			PR_DEBUG(prd_reloc);
			page_unlock(newpp);
			ASSERT(hat_page_getattr(pp, P_MOD) == 0);
		} else {
			PR_DEBUG(prd_relocfail);
		}
	}

	if (hat_ismod(pp)) {
		PR_DEBUG(prd_mod);
		PR_INCR_KSTAT(pr_failed);
		page_unlock(pp);
		return (page_retire_done(pp, PRD_FAILED));
	}

	if (PP_ISKVP(pp)) {
		PR_DEBUG(prd_kern);
		PR_INCR_KSTAT(pr_failed_kernel);
		page_unlock(pp);
		return (page_retire_done(pp, PRD_FAILED));
	}

	if (pp->p_lckcnt || pp->p_cowcnt) {
		PR_DEBUG(prd_locked);
		PR_INCR_KSTAT(pr_failed);
		page_unlock(pp);
		return (page_retire_done(pp, PRD_FAILED));
	}

	(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
	ASSERT(!hat_page_is_mapped(pp));

	/*
	 * If the page is modified, and was not relocated; we can't
	 * retire it without dropping data on the floor. We have to
	 * recheck after unloading since the dirty bit could have been
	 * set since we last checked.
	 */
	if (hat_ismod(pp)) {
		PR_DEBUG(prd_mod_late);
		PR_INCR_KSTAT(pr_failed);
		page_unlock(pp);
		return (page_retire_done(pp, PRD_FAILED));
	}

	if (pp->p_vnode) {
		PR_DEBUG(prd_hashout);
		page_hashout(pp, NULL);
	}
	ASSERT(!pp->p_vnode);

	/*
	 * The problem page is locked, demoted, unmapped, not free,
	 * hashed out, and not COW or mlocked (whew!).
	 *
	 * Now we select our ammunition, take it around back, and shoot it.
	 */
	if (toxic & PR_UE) {
		if (page_retire_transient_ue(pp)) {
			PR_DEBUG(prd_uescrubbed);
			return (page_retire_done(pp, PRD_UE_SCRUBBED));
		} else {
			PR_DEBUG(prd_uenotscrubbed);
			page_retire_destroy(pp);
			return (page_retire_done(pp, PRD_SUCCESS));
		}
	} else if (toxic & PR_FMA) {
		PR_DEBUG(prd_fma);
		page_retire_destroy(pp);
		return (page_retire_done(pp, PRD_SUCCESS));
	} else if (toxic & PR_MCE) {
		PR_DEBUG(prd_mce);
		page_retire_destroy(pp);
		return (page_retire_done(pp, PRD_SUCCESS));
	}
	panic("page_retire_pp: bad toxic flags %d", toxic);
	/*NOTREACHED*/
}
예제 #13
0
파일: balloon.c 프로젝트: bahamas10/openzfs
/*
 * This function is called when we want to decrease the memory reservation
 * of our domain.  Allocate the memory and make a hypervisor call to give
 * it back.
 */
static spgcnt_t
balloon_dec_reservation(ulong_t debit)
{
	int	i, locked;
	long	rv;
	ulong_t	request;
	page_t	*pp;

	bzero(mfn_frames, sizeof (mfn_frames));
	bzero(pfn_frames, sizeof (pfn_frames));

	if (debit > FRAME_ARRAY_SIZE) {
		debit = FRAME_ARRAY_SIZE;
	}
	request = debit;

	/*
	 * Don't bother if there isn't a safe amount of kmem left.
	 */
	if (kmem_avail() < balloon_minkmem) {
		kmem_reap();
		if (kmem_avail() < balloon_minkmem)
			return (0);
	}

	if (page_resv(request, KM_NOSLEEP) == 0) {
		return (0);
	}
	xen_block_migrate();
	for (i = 0; i < debit; i++) {
		pp = page_get_high_mfn(new_high_mfn);
		new_high_mfn = 0;
		if (pp == NULL) {
			/*
			 * Call kmem_reap(), then try once more,
			 * but only if there is a safe amount of
			 * kmem left.
			 */
			kmem_reap();
			if (kmem_avail() < balloon_minkmem ||
			    (pp = page_get_high_mfn(0)) == NULL) {
				debit = i;
				break;
			}
		}
		ASSERT(PAGE_EXCL(pp));
		ASSERT(!hat_page_is_mapped(pp));

		balloon_page_add(pp);
		pfn_frames[i] = pp->p_pagenum;
		mfn_frames[i] = pfn_to_mfn(pp->p_pagenum);
	}
	if (debit == 0) {
		xen_allow_migrate();
		page_unresv(request);
		return (0);
	}

	/*
	 * We zero all the pages before we start reassigning them in order to
	 * minimize the time spent holding the lock on the contig pfn list.
	 */
	if (balloon_zero_memory) {
		for (i = 0; i < debit; i++) {
			pfnzero(pfn_frames[i], 0, PAGESIZE);
		}
	}

	/*
	 * Remove all mappings for the pfns from the system
	 */
	locked = balloon_lock_contig_pfnlist(debit);
	for (i = 0; i < debit; i++) {
		reassign_pfn(pfn_frames[i], MFN_INVALID);
	}
	if (locked)
		unlock_contig_pfnlist();

	rv = balloon_free_pages(debit, mfn_frames, NULL, NULL);

	if (rv < 0) {
		cmn_err(CE_WARN, "Attempt to return pages to the hypervisor "
		    "failed - up to %lu pages lost (error = %ld)", debit, rv);
		rv = 0;
	} else if (rv != debit) {
		panic("Unexpected return value (%ld) from decrease reservation "
		    "hypervisor call", rv);
	}

	xen_allow_migrate();
	if (debit != request)
		page_unresv(request - debit);
	return (rv);
}
예제 #14
0
/*
 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
 * B_TRUNC, B_FORCE}.  B_DELWRI indicates that this page is part of a kluster
 * operation and is only to be considered if it doesn't involve any
 * waiting here.  B_TRUNC indicates that the file is being truncated
 * and so no i/o needs to be done. B_FORCE indicates that the page
 * must be destroyed so don't try wrting it out.
 *
 * The caller must ensure that the page is locked.  Returns 1, if
 * the page should be written back (the "iolock" is held in this
 * case), or 0 if the page has been dealt with or has been
 * unlocked.
 */
int
pvn_getdirty(page_t *pp, int flags)
{
    ASSERT((flags & (B_INVAL | B_FREE)) ?
           PAGE_EXCL(pp) : PAGE_SHARED(pp));
    ASSERT(PP_ISFREE(pp) == 0);

    /*
     * If trying to invalidate or free a logically `locked' page,
     * forget it.  Don't need page_struct_lock to check p_lckcnt and
     * p_cowcnt as the page is exclusively locked.
     */
    if ((flags & (B_INVAL | B_FREE)) && !(flags & (B_TRUNC|B_FORCE)) &&
            (pp->p_lckcnt != 0 || pp->p_cowcnt != 0)) {
        page_unlock(pp);
        return (0);
    }

    /*
     * Now acquire the i/o lock so we can add it to the dirty
     * list (if necessary).  We avoid blocking on the i/o lock
     * in the following cases:
     *
     *	If B_DELWRI is set, which implies that this request is
     *	due to a klustering operartion.
     *
     *	If this is an async (B_ASYNC) operation and we are not doing
     *	invalidation (B_INVAL) [The current i/o or fsflush will ensure
     *	that the the page is written out].
     */
    if ((flags & B_DELWRI) || ((flags & (B_INVAL | B_ASYNC)) == B_ASYNC)) {
        if (!page_io_trylock(pp)) {
            page_unlock(pp);
            return (0);
        }
    } else {
        page_io_lock(pp);
    }

    /*
     * If we want to free or invalidate the page then
     * we need to unload it so that anyone who wants
     * it will have to take a minor fault to get it.
     * Otherwise, we're just writing the page back so we
     * need to sync up the hardwre and software mod bit to
     * detect any future modifications.  We clear the
     * software mod bit when we put the page on the dirty
     * list.
     */
    if (flags & (B_INVAL | B_FREE)) {
        (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
    } else {
        (void) hat_pagesync(pp, HAT_SYNC_ZERORM);
    }

    if (!hat_ismod(pp) || (flags & B_TRUNC)) {
        /*
         * Don't need to add it to the
         * list after all.
         */
        page_io_unlock(pp);
        if (flags & B_INVAL) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else if (flags & B_FREE) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred);
        } else {
            /*
             * This is advisory path for the callers
             * of VOP_PUTPAGE() who prefer freeing the
             * page _only_ if no one else is accessing it.
             * E.g. segmap_release()
             *
             * The above hat_ismod() check is useless because:
             * (1) we may not be holding SE_EXCL lock;
             * (2) we've not unloaded _all_ translations
             *
             * Let page_release() do the heavy-lifting.
             */
            (void) page_release(pp, 1);
        }
        return (0);
    }

    /*
     * Page is dirty, get it ready for the write back
     * and add page to the dirty list.
     */
    hat_clrrefmod(pp);

    /*
     * If we're going to free the page when we're done
     * then we can let others try to use it starting now.
     * We'll detect the fact that they used it when the
     * i/o is done and avoid freeing the page.
     */
    if (flags & B_FREE)
        page_downgrade(pp);


    TRACE_1(TR_FAC_VM, TR_PVN_GETDIRTY, "pvn_getdirty:pp %p", pp);

    return (1);
}
예제 #15
0
/*
 * Allocate pages to back the virtual address range [addr, addr + size).
 * If addr is NULL, allocate the virtual address space as well.
 */
void *
segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
	page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
{
	page_t *ppl;
	caddr_t addr = inaddr;
	pgcnt_t npages = btopr(size);
	int allocflag;

	if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
		return (NULL);

	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);

	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		return (NULL);
	}

	ppl = page_create_func(addr, size, vmflag, pcarg);
	if (ppl == NULL) {
		if (inaddr == NULL)
			vmem_free(vmp, addr, size);
		page_unresv(npages);
		return (NULL);
	}

	/*
	 * Under certain conditions, we need to let the HAT layer know
	 * that it cannot safely allocate memory.  Allocations from
	 * the hat_memload vmem arena always need this, to prevent
	 * infinite recursion.
	 *
	 * In addition, the x86 hat cannot safely do memory
	 * allocations while in vmem_populate(), because there
	 * is no simple bound on its usage.
	 */
	if (vmflag & VM_MEMLOAD)
		allocflag = HAT_NO_KALLOC;
#if defined(__x86)
	else if (vmem_is_populator())
		allocflag = HAT_NO_KALLOC;
#endif
	else
		allocflag = 0;

	while (ppl != NULL) {
		page_t *pp = ppl;
		page_sub(&ppl, pp);
		ASSERT(page_iolock_assert(pp));
		ASSERT(PAGE_EXCL(pp));
		page_io_unlock(pp);
		hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
		    (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
		    HAT_LOAD_LOCK | allocflag);
		pp->p_lckcnt = 1;
#if defined(__x86)
		page_downgrade(pp);
#else
		if (vmflag & SEGKMEM_SHARELOCKED)
			page_downgrade(pp);
		else
			page_unlock(pp);
#endif
	}

	return (addr);
}
예제 #16
0
/*
 * With the addition of reader-writer lock semantics to page_lock_es,
 * callers wanting an exclusive (writer) lock may prevent shared-lock
 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
 * In this case, when an exclusive lock cannot be acquired, p_selock's
 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
 * if the page is slated for retirement.
 *
 * The se and es parameters determine if the lock should be granted
 * based on the following decision table:
 *
 * Lock wanted   es flags     p_selock/SE_EWANTED  Action
 * ----------- -------------- -------------------  ---------
 * SE_EXCL        any [1][2]   unlocked/any        grant lock, clear SE_EWANTED
 * SE_EXCL        SE_EWANTED   any lock/any        deny, set SE_EWANTED
 * SE_EXCL        none         any lock/any        deny
 * SE_SHARED      n/a [2]        shared/0          grant
 * SE_SHARED      n/a [2]      unlocked/0          grant
 * SE_SHARED      n/a            shared/1          deny
 * SE_SHARED      n/a          unlocked/1          deny
 * SE_SHARED      n/a              excl/any        deny
 *
 * Notes:
 * [1] The code grants an exclusive lock to the caller and clears the bit
 *   SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
 *   bit's value.  This was deemed acceptable as we are not concerned about
 *   exclusive-lock starvation. If this ever becomes an issue, a priority or
 *   fifo mechanism should also be implemented. Meantime, the thread that
 *   set SE_EWANTED should be prepared to catch this condition and reset it
 *
 * [2] Retired pages may not be locked at any time, regardless of the
 *   dispostion of se, unless the es parameter has SE_RETIRED flag set.
 *
 * Notes on values of "es":
 *
 *   es & 1: page_lookup_create will attempt page relocation
 *   es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
 *       memory thread); this prevents reader-starvation of waiting
 *       writer thread(s) by giving priority to writers over readers.
 *   es & SE_RETIRED: caller wants to lock pages even if they are
 *       retired.  Default is to deny the lock if the page is retired.
 *
 * And yes, we know, the semantics of this function are too complicated.
 * It's on the list to be cleaned up.
 */
int
page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
{
	int		retval;
	kmutex_t	*pse = PAGE_SE_MUTEX(pp);
	int		upgraded;
	int		reclaim_it;

	ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);

	VM_STAT_ADD(page_lock_count);

	upgraded = 0;
	reclaim_it = 0;

	mutex_enter(pse);

	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));

	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
		mutex_exit(pse);
		VM_STAT_ADD(page_lock_retired);
		return (0);
	}

	if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
		se = SE_EXCL;
	}

	if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {

		reclaim_it = 1;
		if (se == SE_SHARED) {
			/*
			 * This is an interesting situation.
			 *
			 * Remember that p_free can only change if
			 * p_selock < 0.
			 * p_free does not depend on our holding `pse'.
			 * And, since we hold `pse', p_selock can not change.
			 * So, if p_free changes on us, the page is already
			 * exclusively held, and we would fail to get p_selock
			 * regardless.
			 *
			 * We want to avoid getting the share
			 * lock on a free page that needs to be reclaimed.
			 * It is possible that some other thread has the share
			 * lock and has left the free page on the cache list.
			 * pvn_vplist_dirty() does this for brief periods.
			 * If the se_share is currently SE_EXCL, we will fail
			 * to acquire p_selock anyway.  Blocking is the
			 * right thing to do.
			 * If we need to reclaim this page, we must get
			 * exclusive access to it, force the upgrade now.
			 * Again, we will fail to acquire p_selock if the
			 * page is not free and block.
			 */
			upgraded = 1;
			se = SE_EXCL;
			VM_STAT_ADD(page_lock_upgrade);
		}
	}

	if (se == SE_EXCL) {
		if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
			/*
			 * if the caller wants a writer lock (but did not
			 * specify exclusive access), and there is a pending
			 * writer that wants exclusive access, return failure
			 */
			retval = 0;
		} else if ((pp->p_selock & ~SE_EWANTED) == 0) {
			/* no reader/writer lock held */
			THREAD_KPRI_REQUEST();
			/* this clears our setting of the SE_EWANTED bit */
			pp->p_selock = SE_WRITER;
			retval = 1;
		} else {
			/* page is locked */
			if (es & SE_EXCL_WANTED) {
				/* set the SE_EWANTED bit */
				pp->p_selock |= SE_EWANTED;
			}
			retval = 0;
		}
	} else {
		retval = 0;
		if (pp->p_selock >= 0) {
			if ((pp->p_selock & SE_EWANTED) == 0) {
				pp->p_selock += SE_READER;
				retval = 1;
			}
		}
	}

	if (retval == 0) {
		if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
			VM_STAT_ADD(page_lock_deleted);
			mutex_exit(pse);
			return (retval);
		}

#ifdef VM_STATS
		VM_STAT_ADD(page_lock_miss);
		if (upgraded) {
			VM_STAT_ADD(page_lock_upgrade_failed);
		}
#endif
		if (lock) {
			VM_STAT_ADD(page_lock_miss_lock);
			mutex_exit(lock);
		}

		/*
		 * Now, wait for the page to be unlocked and
		 * release the lock protecting p_cv and p_selock.
		 */
		cv_wait(&pp->p_cv, pse);
		mutex_exit(pse);

		/*
		 * The page identity may have changed while we were
		 * blocked.  If we are willing to depend on "pp"
		 * still pointing to a valid page structure (i.e.,
		 * assuming page structures are not dynamically allocated
		 * or freed), we could try to lock the page if its
		 * identity hasn't changed.
		 *
		 * This needs to be measured, since we come back from
		 * cv_wait holding pse (the expensive part of this
		 * operation) we might as well try the cheap part.
		 * Though we would also have to confirm that dropping
		 * `lock' did not cause any grief to the callers.
		 */
		if (lock) {
			mutex_enter(lock);
		}
	} else {
		/*
		 * We have the page lock.
		 * If we needed to reclaim the page, and the page
		 * needed reclaiming (ie, it was free), then we
		 * have the page exclusively locked.  We may need
		 * to downgrade the page.
		 */
		ASSERT((upgraded) ?
		    ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
		mutex_exit(pse);

		/*
		 * We now hold this page's lock, either shared or
		 * exclusive.  This will prevent its identity from changing.
		 * The page, however, may or may not be free.  If the caller
		 * requested, and it is free, go reclaim it from the
		 * free list.  If the page can't be reclaimed, return failure
		 * so that the caller can start all over again.
		 *
		 * NOTE:page_reclaim() releases the page lock (p_selock)
		 *	if it can't be reclaimed.
		 */
		if (reclaim_it) {
			if (!page_reclaim(pp, lock)) {
				VM_STAT_ADD(page_lock_bad_reclaim);
				retval = 0;
			} else {
				VM_STAT_ADD(page_lock_reclaim);
				if (upgraded) {
					page_downgrade(pp);
				}
			}
		}
	}
	return (retval);
}