Esempio n. 1
0
void
pvn_write_done(page_t *plist, int flags)
{
    int dfree = 0;
    int pgrec = 0;
    int pgout = 0;
    int pgpgout = 0;
    int anonpgout = 0;
    int anonfree = 0;
    int fspgout = 0;
    int fsfree = 0;
    int execpgout = 0;
    int execfree = 0;
    page_t *pp;
    struct cpu *cpup;
    struct vnode *vp = NULL;	/* for probe */
    uint_t ppattr;
    kmutex_t *vphm = NULL;

    ASSERT((flags & B_READ) == 0);

    /*
     * If we are about to start paging anyway, start freeing pages.
     */
    if (write_free && freemem < lotsfree + pages_before_pager &&
            (flags & B_ERROR) == 0) {
        flags |= B_FREE;
    }

    /*
     * Handle each page involved in the i/o operation.
     */
    while (plist != NULL) {
        pp = plist;
        ASSERT(PAGE_LOCKED(pp) && page_iolock_assert(pp));
        page_sub(&plist, pp);

        /* Kernel probe support */
        if (vp == NULL)
            vp = pp->p_vnode;

        if (((flags & B_ERROR) == 0) && IS_VMODSORT(vp)) {
            /*
             * Move page to the top of the v_page list.
             * Skip pages modified during IO.
             */
            vphm = page_vnode_mutex(vp);
            mutex_enter(vphm);
            if ((pp->p_vpnext != pp) && !hat_ismod(pp)) {
                page_vpsub(&vp->v_pages, pp);
                page_vpadd(&vp->v_pages, pp);
            }
            mutex_exit(vphm);
        }

        if (flags & B_ERROR) {
            /*
             * Write operation failed.  We don't want
             * to destroy (or free) the page unless B_FORCE
             * is set. We set the mod bit again and release
             * all locks on the page so that it will get written
             * back again later when things are hopefully
             * better again.
             * If B_INVAL and B_FORCE is set we really have
             * to destroy the page.
             */
            if ((flags & (B_INVAL|B_FORCE)) == (B_INVAL|B_FORCE)) {
                page_io_unlock(pp);
                /*LINTED: constant in conditional context*/
                VN_DISPOSE(pp, B_INVAL, 0, kcred);
            } else {
                hat_setmod_only(pp);
                page_io_unlock(pp);
                page_unlock(pp);
            }
        } else if (flags & B_INVAL) {
            /*
             * XXX - Failed writes with B_INVAL set are
             * not handled appropriately.
             */
            page_io_unlock(pp);
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else if (flags & B_FREE ||!hat_page_is_mapped(pp)) {
            /*
             * Update statistics for pages being paged out
             */
            if (pp->p_vnode) {
                if (IS_SWAPFSVP(pp->p_vnode)) {
                    anonpgout++;
                } else {
                    if (pp->p_vnode->v_flag & VVMEXEC) {
                        execpgout++;
                    } else {
                        fspgout++;
                    }
                }
            }
            page_io_unlock(pp);
            pgout = 1;
            pgpgout++;
            TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT,
                    "page_ws_out:pp %p", pp);

            /*
             * The page_struct_lock need not be acquired to
             * examine "p_lckcnt" and "p_cowcnt" since we'll
             * have an "exclusive" lock if the upgrade succeeds.
             */
            if (page_tryupgrade(pp) &&
                    pp->p_lckcnt == 0 && pp->p_cowcnt == 0) {
                /*
                 * Check if someone has reclaimed the
                 * page.  If ref and mod are not set, no
                 * one is using it so we can free it.
                 * The rest of the system is careful
                 * to use the NOSYNC flag to unload
                 * translations set up for i/o w/o
                 * affecting ref and mod bits.
                 *
                 * Obtain a copy of the real hardware
                 * mod bit using hat_pagesync(pp, HAT_DONTZERO)
                 * to avoid having to flush the cache.
                 */
                ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
                                      HAT_SYNC_STOPON_MOD);
ck_refmod:
                if (!(ppattr & (P_REF | P_MOD))) {
                    if (hat_page_is_mapped(pp)) {
                        /*
                         * Doesn't look like the page
                         * was modified so now we
                         * really have to unload the
                         * translations.  Meanwhile
                         * another CPU could've
                         * modified it so we have to
                         * check again.  We don't loop
                         * forever here because now
                         * the translations are gone
                         * and no one can get a new one
                         * since we have the "exclusive"
                         * lock on the page.
                         */
                        (void) hat_pageunload(pp,
                                              HAT_FORCE_PGUNLOAD);
                        ppattr = hat_page_getattr(pp,
                                                  P_REF | P_MOD);
                        goto ck_refmod;
                    }
                    /*
                     * Update statistics for pages being
                     * freed
                     */
                    if (pp->p_vnode) {
                        if (IS_SWAPFSVP(pp->p_vnode)) {
                            anonfree++;
                        } else {
                            if (pp->p_vnode->v_flag
                                    & VVMEXEC) {
                                execfree++;
                            } else {
                                fsfree++;
                            }
                        }
                    }
                    /*LINTED: constant in conditional ctx*/
                    VN_DISPOSE(pp, B_FREE,
                               (flags & B_DONTNEED), kcred);
                    dfree++;
                } else {
                    page_unlock(pp);
                    pgrec++;
                    TRACE_1(TR_FAC_VM, TR_PAGE_WS_FREE,
                            "page_ws_free:pp %p", pp);
                }
            } else {
                /*
                 * Page is either `locked' in memory
                 * or was reclaimed and now has a
                 * "shared" lock, so release it.
                 */
                page_unlock(pp);
            }
        } else {
            /*
             * Neither B_FREE nor B_INVAL nor B_ERROR.
             * Just release locks.
             */
            page_io_unlock(pp);
            page_unlock(pp);
        }
    }

    CPU_STATS_ENTER_K();
    cpup = CPU;		/* get cpup now that CPU cannot change */
    CPU_STATS_ADDQ(cpup, vm, dfree, dfree);
    CPU_STATS_ADDQ(cpup, vm, pgrec, pgrec);
    CPU_STATS_ADDQ(cpup, vm, pgout, pgout);
    CPU_STATS_ADDQ(cpup, vm, pgpgout, pgpgout);
    CPU_STATS_ADDQ(cpup, vm, anonpgout, anonpgout);
    CPU_STATS_ADDQ(cpup, vm, anonfree, anonfree);
    CPU_STATS_ADDQ(cpup, vm, fspgout, fspgout);
    CPU_STATS_ADDQ(cpup, vm, fsfree, fsfree);
    CPU_STATS_ADDQ(cpup, vm, execpgout, execpgout);
    CPU_STATS_ADDQ(cpup, vm, execfree, execfree);
    CPU_STATS_EXIT_K();

    /* Kernel probe */
    TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */,
                tnf_opaque,	vnode,			vp,
                tnf_ulong,	pages_pageout,		pgpgout,
                tnf_ulong,	pages_freed,		dfree,
                tnf_ulong,	pages_reclaimed,	pgrec);
}
Esempio n. 2
0
/*
 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
 * B_TRUNC, B_FORCE}.  B_DELWRI indicates that this page is part of a kluster
 * operation and is only to be considered if it doesn't involve any
 * waiting here.  B_TRUNC indicates that the file is being truncated
 * and so no i/o needs to be done. B_FORCE indicates that the page
 * must be destroyed so don't try wrting it out.
 *
 * The caller must ensure that the page is locked.  Returns 1, if
 * the page should be written back (the "iolock" is held in this
 * case), or 0 if the page has been dealt with or has been
 * unlocked.
 */
int
pvn_getdirty(page_t *pp, int flags)
{
    ASSERT((flags & (B_INVAL | B_FREE)) ?
           PAGE_EXCL(pp) : PAGE_SHARED(pp));
    ASSERT(PP_ISFREE(pp) == 0);

    /*
     * If trying to invalidate or free a logically `locked' page,
     * forget it.  Don't need page_struct_lock to check p_lckcnt and
     * p_cowcnt as the page is exclusively locked.
     */
    if ((flags & (B_INVAL | B_FREE)) && !(flags & (B_TRUNC|B_FORCE)) &&
            (pp->p_lckcnt != 0 || pp->p_cowcnt != 0)) {
        page_unlock(pp);
        return (0);
    }

    /*
     * Now acquire the i/o lock so we can add it to the dirty
     * list (if necessary).  We avoid blocking on the i/o lock
     * in the following cases:
     *
     *	If B_DELWRI is set, which implies that this request is
     *	due to a klustering operartion.
     *
     *	If this is an async (B_ASYNC) operation and we are not doing
     *	invalidation (B_INVAL) [The current i/o or fsflush will ensure
     *	that the the page is written out].
     */
    if ((flags & B_DELWRI) || ((flags & (B_INVAL | B_ASYNC)) == B_ASYNC)) {
        if (!page_io_trylock(pp)) {
            page_unlock(pp);
            return (0);
        }
    } else {
        page_io_lock(pp);
    }

    /*
     * If we want to free or invalidate the page then
     * we need to unload it so that anyone who wants
     * it will have to take a minor fault to get it.
     * Otherwise, we're just writing the page back so we
     * need to sync up the hardwre and software mod bit to
     * detect any future modifications.  We clear the
     * software mod bit when we put the page on the dirty
     * list.
     */
    if (flags & (B_INVAL | B_FREE)) {
        (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
    } else {
        (void) hat_pagesync(pp, HAT_SYNC_ZERORM);
    }

    if (!hat_ismod(pp) || (flags & B_TRUNC)) {
        /*
         * Don't need to add it to the
         * list after all.
         */
        page_io_unlock(pp);
        if (flags & B_INVAL) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else if (flags & B_FREE) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred);
        } else {
            /*
             * This is advisory path for the callers
             * of VOP_PUTPAGE() who prefer freeing the
             * page _only_ if no one else is accessing it.
             * E.g. segmap_release()
             *
             * The above hat_ismod() check is useless because:
             * (1) we may not be holding SE_EXCL lock;
             * (2) we've not unloaded _all_ translations
             *
             * Let page_release() do the heavy-lifting.
             */
            (void) page_release(pp, 1);
        }
        return (0);
    }

    /*
     * Page is dirty, get it ready for the write back
     * and add page to the dirty list.
     */
    hat_clrrefmod(pp);

    /*
     * If we're going to free the page when we're done
     * then we can let others try to use it starting now.
     * We'll detect the fact that they used it when the
     * i/o is done and avoid freeing the page.
     */
    if (flags & B_FREE)
        page_downgrade(pp);


    TRACE_1(TR_FAC_VM, TR_PVN_GETDIRTY, "pvn_getdirty:pp %p", pp);

    return (1);
}
Esempio n. 3
0
/*
 * Scan page_t's and issue I/O's for modified pages.
 *
 * Also coalesces consecutive small sized free pages into the next larger
 * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
 * spent scanning on later passes and for anybody allocating large pages.
 */
static void
fsflush_do_pages()
{
	vnode_t		*vp;
	ulong_t		pcount;
	hrtime_t	timer = gethrtime();
	ulong_t		releases = 0;
	ulong_t		nexamined = 0;
	ulong_t		nlocked = 0;
	ulong_t		nmodified = 0;
	ulong_t		ncoalesce = 0;
	ulong_t		cnt;
	int		mod;
	int		fspage = 1;
	u_offset_t	offset;
	uint_t		szc;

	page_t		*coal_page = NULL;  /* 1st page in group to coalesce */
	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
	uint_t		coal_cnt = 0;	    /* count of pages seen */

	static ulong_t	nscan = 0;
	static pgcnt_t	last_total_pages = 0;
	static page_t	*pp = NULL;

	/*
	 * Check to see if total_pages has changed.
	 */
	if (total_pages != last_total_pages) {
		last_total_pages = total_pages;
		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
	}

	if (pp == NULL)
		pp = memsegs->pages;

	pcount = 0;
	while (pcount < nscan) {

		/*
		 * move to the next page, skipping over large pages
		 * and issuing prefetches.
		 */
		if (pp->p_szc && fspage == 0) {
			pfn_t pfn;

			pfn  = page_pptonum(pp);
			cnt = page_get_pagecnt(pp->p_szc);
			cnt -= pfn & (cnt - 1);
		} else
			cnt = 1;

		pp = page_nextn(pp, cnt);
		prefetch_page_r((void *)pp);
		ASSERT(pp != NULL);
		pcount += cnt;

		/*
		 * Do a bunch of dirty tests (ie. no locking) to determine
		 * if we can quickly skip this page. These tests are repeated
		 * after acquiring the page lock.
		 */
		++nexamined;
		if (PP_ISSWAP(pp)) {
			fspage = 0;
			coal_page = NULL;
			continue;
		}

		/*
		 * skip free pages too, but try coalescing them into larger
		 * pagesizes
		 */
		if (PP_ISFREE(pp)) {
			/*
			 * skip pages with a file system identity or that
			 * are already maximum size
			 */
			fspage = 0;
			szc = pp->p_szc;
			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
				coal_page = NULL;
				continue;
			}

			/*
			 * If not in a coalescing candidate page or the size
			 * codes are different, start a new candidate.
			 */
			if (coal_page == NULL || coal_szc != szc) {

				/*
				 * page must be properly aligned
				 */
				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
					coal_page = NULL;
					continue;
				}
				coal_page = pp;
				coal_szc = szc;
				coal_cnt = 1;
				continue;
			}

			/*
			 * acceptable to add this to existing candidate page
			 */
			++coal_cnt;
			if (coal_cnt < fsf_pgcnt[coal_szc])
				continue;

			/*
			 * We've got enough pages to coalesce, so do it.
			 * After promoting, we clear coal_page, so it will
			 * take another pass to promote this to an even
			 * larger page.
			 */
			++ncoalesce;
			(void) page_promote_size(coal_page, coal_szc);
			coal_page = NULL;
			continue;
		} else {
			coal_page = NULL;
		}

		if (PP_ISKAS(pp) ||
		    PAGE_LOCKED(pp) ||
		    pp->p_lckcnt != 0 ||
		    pp->p_cowcnt != 0) {
			fspage = 0;
			continue;
		}


		/*
		 * Reject pages that can't be "exclusively" locked.
		 */
		if (!page_trylock(pp, SE_EXCL))
			continue;
		++nlocked;


		/*
		 * After locking the page, redo the above checks.
		 * Since we locked the page, leave out the PAGE_LOCKED() test.
		 */
		vp = pp->p_vnode;
		if (PP_ISSWAP(pp) ||
		    PP_ISFREE(pp) ||
		    vp == NULL ||
		    PP_ISKAS(pp) ||
		    (vp->v_flag & VISSWAP) != 0) {
			page_unlock(pp);
			fspage = 0;
			continue;
		}
		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
			page_unlock(pp);
			continue;
		}

		fspage = 1;
		ASSERT(vp->v_type != VCHR);

		/*
		 * Check the modified bit. Leaving the bit alone in hardware.
		 * It will be cleared if we do the putpage.
		 */
		if (IS_VMODSORT(vp))
			mod = hat_ismod(pp);
		else
			mod = hat_pagesync(pp,
			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;

		if (mod) {
			++nmodified;
			offset = pp->p_offset;

			/*
			 * Hold the vnode before releasing the page lock
			 * to prevent it from being freed and re-used by
			 * some other thread.
			 */
			VN_HOLD(vp);

			page_unlock(pp);

			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
			    kcred, NULL);

			VN_RELE(vp);
		} else {

			/*
			 * Catch any pages which should be on the cache list,
			 * but aren't yet.
			 */
			if (hat_page_is_mapped(pp) == 0) {
				++releases;
				(void) page_release(pp, 1);
			} else {
				page_unlock(pp);
			}
		}
	}

	/*
	 * maintain statistics
	 * reset every million wakeups, just to avoid overflow
	 */
	if (++fsf_cycles == 1000000) {
		fsf_cycles = 0;
		fsf_total.fsf_scan = 0;
		fsf_total.fsf_examined = 0;
		fsf_total.fsf_locked = 0;
		fsf_total.fsf_modified = 0;
		fsf_total.fsf_coalesce = 0;
		fsf_total.fsf_time = 0;
		fsf_total.fsf_releases = 0;
	} else {
		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
	}
}