Exemple #1
0
/*
 * page_retire_hunt() callback for the retire thread.
 */
static void
page_retire_thread_cb(page_t *pp)
{
	PR_DEBUG(prd_tctop);
	if (!PP_ISKVP(pp) && page_trylock(pp, SE_EXCL)) {
		PR_DEBUG(prd_tclocked);
		page_unlock(pp);
	}
}
int
plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
{
	page_t *pp = page_numtopp_nolock(pfn);

	if (pp == NULL)
		return (PLAT_HOLD_FAIL);

#if !defined(__xpv)
	/*
	 * Pages are locked SE_SHARED because some hypervisors
	 * like xVM ESX reclaim Guest OS memory by locking
	 * it SE_EXCL so we want to leave these pages alone.
	 */
	if (lock == PLAT_HOLD_LOCK) {
		ASSERT(pp_ret != NULL);
		if (page_trylock(pp, SE_SHARED) == 0)
			return (PLAT_HOLD_FAIL);
	}
#else	/* __xpv */
	if (lock == PLAT_HOLD_LOCK) {
		ASSERT(pp_ret != NULL);
		if (page_trylock(pp, SE_EXCL) == 0)
			return (PLAT_HOLD_FAIL);
	}

	if (mfn_list[pfn] == MFN_INVALID) {
		/* We failed - release the lock if we grabbed it earlier */
		if (lock == PLAT_HOLD_LOCK) {
			page_unlock(pp);
		}
		return (PLAT_HOLD_FAIL);
	}
#endif	/* __xpv */

	if (lock == PLAT_HOLD_LOCK)
		*pp_ret = pp;

	return (PLAT_HOLD_OK);
}
Exemple #3
0
static void
less_pages(uint64_t base, uint64_t len)
{
	uint64_t pa, end = base + len;
	extern int kcage_on;

	for (pa = base; pa < end; pa += PAGESIZE) {
		pfn_t pfnum;
		page_t *pp;

		pfnum = (pfn_t)(pa >> PAGESHIFT);
		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
			cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);

		/*
		 * must break up any large pages that may have
		 * constituent pages being utilized for
		 * prom_alloc()'s. page_reclaim() can't handle
		 * large pages.
		 */
		if (pp->p_szc != 0)
			page_boot_demote(pp);

		if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
			/*
			 * Ahhh yes, a prom page,
			 * suck it off the freelist,
			 * lock it, and hashin on prom_pages vp.
			 */
			if (page_trylock(pp, SE_EXCL) == 0)
				cmn_err(CE_PANIC, "prom page locked");

			(void) page_reclaim(pp, NULL);
			/*
			 * vnode offsets on the prom_ppages vnode
			 * are page numbers (gack) for >32 bit
			 * physical memory machines.
			 */
			(void) page_hashin(pp, &promvp,
			    (offset_t)pfnum, NULL);

			if (kcage_on) {
				ASSERT(pp->p_szc == 0);
				if (PP_ISNORELOC(pp) == 0) {
					PP_SETNORELOC(pp);
					PLCNT_XFER_NORELOC(pp);
				}
			}
			(void) page_pp_lock(pp, 0, 1);
		}
	}
}
Exemple #4
0
/*
 * Page retire self-test. For now, it always returns 0.
 */
int
page_retire_test(void)
{
	page_t *first, *pp, *cpp, *cpp2, *lpp;

	/*
	 * Tests the corner case where a large page can't be retired
	 * because one of the constituent pages is locked. We mark
	 * one page to be retired and try to retire it, and mark the
	 * other page to be retired but don't try to retire it, so
	 * that page_unlock() in the failure path will recurse and try
	 * to retire THAT page. This is the worst possible situation
	 * we can get ourselves into.
	 */
	memsegs_lock(0);
	pp = first = page_first();
	do {
		if (pp->p_szc && PP_PAGEROOT(pp) == pp) {
			cpp = pp + 1;
			lpp = PP_ISFREE(pp)? pp : pp + 2;
			cpp2 = pp + 3;
			if (!page_trylock(lpp, pp == lpp? SE_EXCL : SE_SHARED))
				continue;
			if (!page_trylock(cpp, SE_EXCL)) {
				page_unlock(lpp);
				continue;
			}
			page_settoxic(cpp, PR_FMA | PR_BUSY);
			page_settoxic(cpp2, PR_FMA);
			page_tryretire(cpp);	/* will fail */
			page_unlock(lpp);
			(void) page_retire(cpp->p_pagenum, PR_FMA);
			(void) page_retire(cpp2->p_pagenum, PR_FMA);
		}
	} while ((pp = page_next(pp)) != first);
	memsegs_unlock(0);

	return (0);
}
Exemple #5
0
/*
 * page_retire() - the front door in to retire a page.
 *
 * Ideally, page_retire() would instantly retire the requested page.
 * Unfortunately, some pages are locked or otherwise tied up and cannot be
 * retired right away. To deal with that, bits are set in p_toxic of the
 * page_t. An attempt is made to lock the page; if the attempt is successful,
 * we instantly unlock the page counting on page_unlock() to notice p_toxic
 * is nonzero and to call back into page_retire_pp(). Success is determined
 * by looking to see whether the page has been retired once it has been
 * unlocked.
 *
 * Returns:
 *
 *   - 0 on success,
 *   - EINVAL when the PA is whacko,
 *   - EIO if the page is already retired or already pending retirement, or
 *   - EAGAIN if the page could not be _immediately_ retired but is pending.
 */
int
page_retire(uint64_t pa, uchar_t reason)
{
	page_t	*pp;

	ASSERT(reason & PR_REASONS);		/* there must be a reason */
	ASSERT(!(reason & ~PR_REASONS));	/* but no other bits */

	pp = page_numtopp_nolock(mmu_btop(pa));
	if (pp == NULL) {
		PR_MESSAGE(CE_WARN, 1, "Cannot schedule clearing of error on"
		    " page 0x%08x.%08x; page is not relocatable memory", pa);
		return (page_retire_done(pp, PRD_INVALID_PA));
	}
	if (PP_RETIRED(pp)) {
		PR_DEBUG(prd_dup1);
		return (page_retire_done(pp, PRD_DUPLICATE));
	}

	if ((reason & PR_UE) && !PP_TOXIC(pp)) {
		PR_MESSAGE(CE_NOTE, 1, "Scheduling clearing of error on"
		    " page 0x%08x.%08x", pa);
	} else if (PP_PR_REQ(pp)) {
		PR_DEBUG(prd_dup2);
		return (page_retire_done(pp, PRD_DUPLICATE));
	} else {
		PR_MESSAGE(CE_NOTE, 1, "Scheduling removal of"
		    " page 0x%08x.%08x", pa);
	}
	page_settoxic(pp, reason);
	page_retire_enqueue(pp);

	/*
	 * And now for some magic.
	 *
	 * We marked this page toxic up above.  All there is left to do is
	 * to try to lock the page and then unlock it.  The page lock routines
	 * will intercept the page and retire it if they can.  If the page
	 * cannot be locked, 's okay -- page_unlock() will eventually get it,
	 * or the background thread, until then the lock routines will deny
	 * further locks on it.
	 */
	if (MTBF(pr_calls, pr_mtbf) && page_trylock(pp, SE_EXCL)) {
		PR_DEBUG(prd_prlocked);
		page_unlock(pp);
	} else {
		PR_DEBUG(prd_prnotlocked);
	}

	if (PP_RETIRED(pp)) {
		PR_DEBUG(prd_prretired);
		return (0);
	} else {
		cv_signal(&pr_cv);
		PR_INCR_KSTAT(pr_failed);

		if (pp->p_toxic & PR_MSG) {
			return (page_retire_done(pp, PRD_FAILED));
		} else {
			return (page_retire_done(pp, PRD_PENDING));
		}
	}
}
Exemple #6
0
void
plat_freelist_process(int mnode)
{
	page_t		*page, **freelist;
	page_t		*bdlist[STARFIRE_MAX_BOARDS];
	page_t		 **sortlist[STARFIRE_MAX_BOARDS];
	uint32_t	idx, idy, size, color, max_color, lbn;
	uint32_t	bd_flags, bd_cnt, result, bds;
	kmutex_t	*pcm;
	int 		mtype;

	/* for each page size */
	for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
		for (size = 0; size < mmu_page_sizes; size++) {

			/*
			 * Compute the maximum # of phys colors based on
			 * page size.
			 */
			max_color = page_get_pagecolors(size);

			/* for each color */
			for (color = 0; color < max_color; color++) {

				bd_cnt = 0;
				bd_flags = 0;
				for (idx = 0; idx < STARFIRE_MAX_BOARDS;
				    idx++) {
					bdlist[idx] = NULL;
					sortlist[idx] = NULL;
				}

				/* find freelist */
				freelist = &PAGE_FREELISTS(mnode, size,
				    color, mtype);

				if (*freelist == NULL)
					continue;

				/* acquire locks */
				pcm = PC_BIN_MUTEX(mnode, color, PG_FREE_LIST);
				mutex_enter(pcm);

				/*
				 * read freelist & sort pages by logical
				 * board number
				 */
				/* grab pages till last one. */
				while (*freelist) {
					page = *freelist;
					result = page_trylock(page, SE_EXCL);

					ASSERT(result);

					/* Delete from freelist */
					if (size != 0) {
						page_vpsub(freelist, page);
					} else {
						mach_page_sub(freelist, page);
					}

					/* detect the lbn */
					lbn = PFN_2_LBN(page->p_pagenum);

					/* add to bdlist[lbn] */
					if (size != 0) {
						page_vpadd(&bdlist[lbn], page);
					} else {
						mach_page_add(&bdlist[lbn],
						    page);
					}

					/* if lbn new */
					if ((bd_flags & (1 << lbn)) == 0) {
						bd_flags |= (1 << lbn);
						bd_cnt++;
					}
					page_unlock(page);
				}

				/*
				 * Make the sortlist so
				 * bd_cnt choices show up
				 */
				bds = 0;
				for (idx = 0; idx < STARFIRE_MAX_BOARDS;
				    idx++) {
					if (bdlist[idx])
						sortlist[bds++] = &bdlist[idx];
				}

				/*
				 * Set random start.
				 */
				(void) random_idx(-color);

				/*
				 * now rebuild the freelist by shuffling
				 * pages from bd lists
				 */
				while (bd_cnt) {

					/*
					 * get "random" index between 0 &
					 * bd_cnt
					 */

					ASSERT(bd_cnt &&
					    (bd_cnt < STARFIRE_MAX_BOARDS+1));

					idx = random_idx(bd_cnt);

					page = *sortlist[idx];
					result = page_trylock(page, SE_EXCL);

					ASSERT(result);

					/* Delete from sort_list */
					/*  & Append to freelist */
					/* Big pages use vp_add - 8k don't */
					if (size != 0) {
						page_vpsub(sortlist[idx], page);
						page_vpadd(freelist, page);
					} else {
						mach_page_sub(sortlist[idx],
						    page);
						mach_page_add(freelist, page);
					}

					/* needed for indexing tmp lists */
					lbn = PFN_2_LBN(page->p_pagenum);

					/*
					 * if this was the last page on this
					 * list?
					 */
					if (*sortlist[idx] == NULL) {

						/* have to find brd list */

						/* idx is lbn? -- No! */
						/* sortlist, brdlist */
						/*  have diff indexs */
						bd_flags &= ~(1 << lbn);
						--bd_cnt;

						/*
						 * redo the sortlist so only
						 * bd_cnt choices show up
						 */
						bds = 0;
						for (idy = 0;
						    idy < STARFIRE_MAX_BOARDS;
						    idy++) {
							if (bdlist[idy]) {
								sortlist[bds++]
								/* CSTYLED */
								= &bdlist[idy];
							}
						}
					}
					page_unlock(page);
				}
				mutex_exit(pcm);
			}
		}
	}
}
Exemple #7
0
/*
 * Process a vnode's page list for all pages whose offset is >= off.
 * Pages are to either be free'd, invalidated, or written back to disk.
 *
 * An "exclusive" lock is acquired for each page if B_INVAL or B_FREE
 * is specified, otherwise they are "shared" locked.
 *
 * Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_TRUNC}
 *
 * Special marker page_t's are inserted in the list in order
 * to keep track of where we are in the list when locks are dropped.
 *
 * Note the list is circular and insertions can happen only at the
 * head and tail of the list. The algorithm ensures visiting all pages
 * on the list in the following way:
 *
 *    Drop two marker pages at the end of the list.
 *
 *    Move one marker page backwards towards the start of the list until
 *    it is at the list head, processing the pages passed along the way.
 *
 *    Due to race conditions when the vphm mutex is dropped, additional pages
 *    can be added to either end of the list, so we'll continue to move
 *    the marker and process pages until it is up against the end marker.
 *
 * There is one special exit condition. If we are processing a VMODSORT
 * vnode and only writing back modified pages, we can stop as soon as
 * we run into an unmodified page.  This makes fsync(3) operations fast.
 */
int
pvn_vplist_dirty(
    vnode_t		*vp,
    u_offset_t	off,
    int		(*putapage)(vnode_t *, page_t *, u_offset_t *,
                        size_t *, int, cred_t *),
    int		flags,
    cred_t		*cred)
{
    page_t		*pp;
    page_t		*mark;		/* marker page that moves toward head */
    page_t		*end;		/* marker page at end of list */
    int		err = 0;
    int		error;
    kmutex_t	*vphm;
    se_t		se;
    page_t		**where_to_move;

    ASSERT(vp->v_type != VCHR);

    if (vp->v_pages == NULL)
        return (0);


    /*
     * Serialize vplist_dirty operations on this vnode by setting VVMLOCK.
     *
     * Don't block on VVMLOCK if B_ASYNC is set. This prevents sync()
     * from getting blocked while flushing pages to a dead NFS server.
     */
    mutex_enter(&vp->v_lock);
    if ((vp->v_flag & VVMLOCK) && (flags & B_ASYNC)) {
        mutex_exit(&vp->v_lock);
        return (EAGAIN);
    }

    while (vp->v_flag & VVMLOCK)
        cv_wait(&vp->v_cv, &vp->v_lock);

    if (vp->v_pages == NULL) {
        mutex_exit(&vp->v_lock);
        return (0);
    }

    vp->v_flag |= VVMLOCK;
    mutex_exit(&vp->v_lock);


    /*
     * Set up the marker pages used to walk the list
     */
    end = kmem_cache_alloc(marker_cache, KM_SLEEP);
    end->p_vnode = vp;
    end->p_offset = (u_offset_t)-2;
    mark = kmem_cache_alloc(marker_cache, KM_SLEEP);
    mark->p_vnode = vp;
    mark->p_offset = (u_offset_t)-1;

    /*
     * Grab the lock protecting the vnode's page list
     * note that this lock is dropped at times in the loop.
     */
    vphm = page_vnode_mutex(vp);
    mutex_enter(vphm);
    if (vp->v_pages == NULL)
        goto leave;

    /*
     * insert the markers and loop through the list of pages
     */
    page_vpadd(&vp->v_pages->p_vpprev->p_vpnext, mark);
    page_vpadd(&mark->p_vpnext, end);
    for (;;) {

        /*
         * If only doing an async write back, then we can
         * stop as soon as we get to start of the list.
         */
        if (flags == B_ASYNC && vp->v_pages == mark)
            break;

        /*
         * otherwise stop when we've gone through all the pages
         */
        if (mark->p_vpprev == end)
            break;

        pp = mark->p_vpprev;
        if (vp->v_pages == pp)
            where_to_move = &vp->v_pages;
        else
            where_to_move = &pp->p_vpprev->p_vpnext;

        ASSERT(pp->p_vnode == vp);

        /*
         * If just flushing dirty pages to disk and this vnode
         * is using a sorted list of pages, we can stop processing
         * as soon as we find an unmodified page. Since all the
         * modified pages are visited first.
         */
        if (IS_VMODSORT(vp) &&
                !(flags & (B_INVAL | B_FREE | B_TRUNC))) {
            if (!hat_ismod(pp) && !page_io_locked(pp)) {
#ifdef  DEBUG
                /*
                 * For debug kernels examine what should be
                 * all the remaining clean pages, asserting
                 * that they are not modified.
                 */
                page_t	*chk = pp;
                int	attr;

                page_vpsub(&vp->v_pages, mark);
                page_vpadd(where_to_move, mark);
                do {
                    chk = chk->p_vpprev;
                    ASSERT(chk != end);
                    if (chk == mark)
                        continue;
                    attr = hat_page_getattr(chk, P_MOD |
                                            P_REF);
                    if ((attr & P_MOD) == 0)
                        continue;
                    panic("v_pages list not all clean: "
                          "page_t*=%p vnode=%p off=%lx "
                          "attr=0x%x last clean page_t*=%p\n",
                          (void *)chk, (void *)chk->p_vnode,
                          (long)chk->p_offset, attr,
                          (void *)pp);
                } while (chk != vp->v_pages);
#endif
                break;
            } else if (!(flags & B_ASYNC) && !hat_ismod(pp)) {
                /*
                 * Couldn't get io lock, wait until IO is done.
                 * Block only for sync IO since we don't want
                 * to block async IO.
                 */
                mutex_exit(vphm);
                page_io_wait(pp);
                mutex_enter(vphm);
                continue;
            }
        }

        /*
         * Skip this page if the offset is out of the desired range.
         * Just move the marker and continue.
         */
        if (pp->p_offset < off) {
            page_vpsub(&vp->v_pages, mark);
            page_vpadd(where_to_move, mark);
            continue;
        }

        /*
         * If we are supposed to invalidate or free this
         * page, then we need an exclusive lock.
         */
        se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED;

        /*
         * We must acquire the page lock for all synchronous
         * operations (invalidate, free and write).
         */
        if ((flags & B_INVAL) != 0 || (flags & B_ASYNC) == 0) {
            /*
             * If the page_lock() drops the mutex
             * we must retry the loop.
             */
            if (!page_lock(pp, se, vphm, P_NO_RECLAIM))
                continue;

            /*
             * It's ok to move the marker page now.
             */
            page_vpsub(&vp->v_pages, mark);
            page_vpadd(where_to_move, mark);
        } else {

            /*
             * update the marker page for all remaining cases
             */
            page_vpsub(&vp->v_pages, mark);
            page_vpadd(where_to_move, mark);

            /*
             * For write backs, If we can't lock the page, it's
             * invalid or in the process of being destroyed.  Skip
             * it, assuming someone else is writing it.
             */
            if (!page_trylock(pp, se))
                continue;
        }

        ASSERT(pp->p_vnode == vp);

        /*
         * Successfully locked the page, now figure out what to
         * do with it. Free pages are easily dealt with, invalidate
         * if desired or just go on to the next page.
         */
        if (PP_ISFREE(pp)) {
            if ((flags & B_INVAL) == 0) {
                page_unlock(pp);
                continue;
            }

            /*
             * Invalidate (destroy) the page.
             */
            mutex_exit(vphm);
            page_destroy_free(pp);
            mutex_enter(vphm);
            continue;
        }

        /*
         * pvn_getdirty() figures out what do do with a dirty page.
         * If the page is dirty, the putapage() routine will write it
         * and will kluster any other adjacent dirty pages it can.
         *
         * pvn_getdirty() and `(*putapage)' unlock the page.
         */
        mutex_exit(vphm);
        if (pvn_getdirty(pp, flags)) {
            error = (*putapage)(vp, pp, NULL, NULL, flags, cred);
            if (!err)
                err = error;
        }
        mutex_enter(vphm);
    }
    page_vpsub(&vp->v_pages, mark);
    page_vpsub(&vp->v_pages, end);

leave:
    /*
     * Release v_pages mutex, also VVMLOCK and wakeup blocked thrds
     */
    mutex_exit(vphm);
    kmem_cache_free(marker_cache, mark);
    kmem_cache_free(marker_cache, end);
    mutex_enter(&vp->v_lock);
    vp->v_flag &= ~VVMLOCK;
    cv_broadcast(&vp->v_cv);
    mutex_exit(&vp->v_lock);
    return (err);
}
/*
 * Scan page_t's and issue I/O's for modified pages.
 *
 * Also coalesces consecutive small sized free pages into the next larger
 * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
 * spent scanning on later passes and for anybody allocating large pages.
 */
static void
fsflush_do_pages()
{
	vnode_t		*vp;
	ulong_t		pcount;
	hrtime_t	timer = gethrtime();
	ulong_t		releases = 0;
	ulong_t		nexamined = 0;
	ulong_t		nlocked = 0;
	ulong_t		nmodified = 0;
	ulong_t		ncoalesce = 0;
	ulong_t		cnt;
	int		mod;
	int		fspage = 1;
	u_offset_t	offset;
	uint_t		szc;

	page_t		*coal_page = NULL;  /* 1st page in group to coalesce */
	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
	uint_t		coal_cnt = 0;	    /* count of pages seen */

	static ulong_t	nscan = 0;
	static pgcnt_t	last_total_pages = 0;
	static page_t	*pp = NULL;

	/*
	 * Check to see if total_pages has changed.
	 */
	if (total_pages != last_total_pages) {
		last_total_pages = total_pages;
		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
	}

	if (pp == NULL)
		pp = memsegs->pages;

	pcount = 0;
	while (pcount < nscan) {

		/*
		 * move to the next page, skipping over large pages
		 * and issuing prefetches.
		 */
		if (pp->p_szc && fspage == 0) {
			pfn_t pfn;

			pfn  = page_pptonum(pp);
			cnt = page_get_pagecnt(pp->p_szc);
			cnt -= pfn & (cnt - 1);
		} else
			cnt = 1;

		pp = page_nextn(pp, cnt);
		prefetch_page_r((void *)pp);
		ASSERT(pp != NULL);
		pcount += cnt;

		/*
		 * Do a bunch of dirty tests (ie. no locking) to determine
		 * if we can quickly skip this page. These tests are repeated
		 * after acquiring the page lock.
		 */
		++nexamined;
		if (PP_ISSWAP(pp)) {
			fspage = 0;
			coal_page = NULL;
			continue;
		}

		/*
		 * skip free pages too, but try coalescing them into larger
		 * pagesizes
		 */
		if (PP_ISFREE(pp)) {
			/*
			 * skip pages with a file system identity or that
			 * are already maximum size
			 */
			fspage = 0;
			szc = pp->p_szc;
			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
				coal_page = NULL;
				continue;
			}

			/*
			 * If not in a coalescing candidate page or the size
			 * codes are different, start a new candidate.
			 */
			if (coal_page == NULL || coal_szc != szc) {

				/*
				 * page must be properly aligned
				 */
				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
					coal_page = NULL;
					continue;
				}
				coal_page = pp;
				coal_szc = szc;
				coal_cnt = 1;
				continue;
			}

			/*
			 * acceptable to add this to existing candidate page
			 */
			++coal_cnt;
			if (coal_cnt < fsf_pgcnt[coal_szc])
				continue;

			/*
			 * We've got enough pages to coalesce, so do it.
			 * After promoting, we clear coal_page, so it will
			 * take another pass to promote this to an even
			 * larger page.
			 */
			++ncoalesce;
			(void) page_promote_size(coal_page, coal_szc);
			coal_page = NULL;
			continue;
		} else {
			coal_page = NULL;
		}

		if (PP_ISKAS(pp) ||
		    PAGE_LOCKED(pp) ||
		    pp->p_lckcnt != 0 ||
		    pp->p_cowcnt != 0) {
			fspage = 0;
			continue;
		}


		/*
		 * Reject pages that can't be "exclusively" locked.
		 */
		if (!page_trylock(pp, SE_EXCL))
			continue;
		++nlocked;


		/*
		 * After locking the page, redo the above checks.
		 * Since we locked the page, leave out the PAGE_LOCKED() test.
		 */
		vp = pp->p_vnode;
		if (PP_ISSWAP(pp) ||
		    PP_ISFREE(pp) ||
		    vp == NULL ||
		    PP_ISKAS(pp) ||
		    (vp->v_flag & VISSWAP) != 0) {
			page_unlock(pp);
			fspage = 0;
			continue;
		}
		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
			page_unlock(pp);
			continue;
		}

		fspage = 1;
		ASSERT(vp->v_type != VCHR);

		/*
		 * Check the modified bit. Leaving the bit alone in hardware.
		 * It will be cleared if we do the putpage.
		 */
		if (IS_VMODSORT(vp))
			mod = hat_ismod(pp);
		else
			mod = hat_pagesync(pp,
			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;

		if (mod) {
			++nmodified;
			offset = pp->p_offset;

			/*
			 * Hold the vnode before releasing the page lock
			 * to prevent it from being freed and re-used by
			 * some other thread.
			 */
			VN_HOLD(vp);

			page_unlock(pp);

			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
			    kcred, NULL);

			VN_RELE(vp);
		} else {

			/*
			 * Catch any pages which should be on the cache list,
			 * but aren't yet.
			 */
			if (hat_page_is_mapped(pp) == 0) {
				++releases;
				(void) page_release(pp, 1);
			} else {
				page_unlock(pp);
			}
		}
	}

	/*
	 * maintain statistics
	 * reset every million wakeups, just to avoid overflow
	 */
	if (++fsf_cycles == 1000000) {
		fsf_cycles = 0;
		fsf_total.fsf_scan = 0;
		fsf_total.fsf_examined = 0;
		fsf_total.fsf_locked = 0;
		fsf_total.fsf_modified = 0;
		fsf_total.fsf_coalesce = 0;
		fsf_total.fsf_time = 0;
		fsf_total.fsf_releases = 0;
	} else {
		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
	}
}