/* * Walk the vp->v_pages list, for every page call the callback function * pointed by *page_check. If page_check returns non-zero, then mark the * page as modified and if VMODSORT is set, move it to the end of v_pages * list. Moving makes sense only if we have at least two pages - this also * avoids having v_pages temporarily being NULL after calling page_vpsub() * if there was just one page. */ void pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *)) { page_t *pp, *next, *end; kmutex_t *vphm; int shuffle; vphm = page_vnode_mutex(vp); mutex_enter(vphm); if (vp->v_pages == NULL) { mutex_exit(vphm); return; } end = vp->v_pages->p_vpprev; shuffle = IS_VMODSORT(vp) && (vp->v_pages != end); pp = vp->v_pages; for (;;) { next = pp->p_vpnext; if (pp->p_hash != PVN_VPLIST_HASH_TAG && page_check(pp)) { /* * hat_setmod_only() in contrast to hat_setmod() does * not shuffle the pages and does not grab the mutex * page_vnode_mutex. Exactly what we need. */ hat_setmod_only(pp); if (shuffle) { page_vpsub(&vp->v_pages, pp); ASSERT(vp->v_pages != NULL); page_vpadd(&vp->v_pages->p_vpprev->p_vpnext, pp); } } /* Stop if we have just processed the last page. */ if (pp == end) break; pp = next; } mutex_exit(vphm); }
void plat_freelist_process(int mnode) { page_t *page, **freelist; page_t *bdlist[STARFIRE_MAX_BOARDS]; page_t **sortlist[STARFIRE_MAX_BOARDS]; uint32_t idx, idy, size, color, max_color, lbn; uint32_t bd_flags, bd_cnt, result, bds; kmutex_t *pcm; int mtype; /* for each page size */ for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { for (size = 0; size < mmu_page_sizes; size++) { /* * Compute the maximum # of phys colors based on * page size. */ max_color = page_get_pagecolors(size); /* for each color */ for (color = 0; color < max_color; color++) { bd_cnt = 0; bd_flags = 0; for (idx = 0; idx < STARFIRE_MAX_BOARDS; idx++) { bdlist[idx] = NULL; sortlist[idx] = NULL; } /* find freelist */ freelist = &PAGE_FREELISTS(mnode, size, color, mtype); if (*freelist == NULL) continue; /* acquire locks */ pcm = PC_BIN_MUTEX(mnode, color, PG_FREE_LIST); mutex_enter(pcm); /* * read freelist & sort pages by logical * board number */ /* grab pages till last one. */ while (*freelist) { page = *freelist; result = page_trylock(page, SE_EXCL); ASSERT(result); /* Delete from freelist */ if (size != 0) { page_vpsub(freelist, page); } else { mach_page_sub(freelist, page); } /* detect the lbn */ lbn = PFN_2_LBN(page->p_pagenum); /* add to bdlist[lbn] */ if (size != 0) { page_vpadd(&bdlist[lbn], page); } else { mach_page_add(&bdlist[lbn], page); } /* if lbn new */ if ((bd_flags & (1 << lbn)) == 0) { bd_flags |= (1 << lbn); bd_cnt++; } page_unlock(page); } /* * Make the sortlist so * bd_cnt choices show up */ bds = 0; for (idx = 0; idx < STARFIRE_MAX_BOARDS; idx++) { if (bdlist[idx]) sortlist[bds++] = &bdlist[idx]; } /* * Set random start. */ (void) random_idx(-color); /* * now rebuild the freelist by shuffling * pages from bd lists */ while (bd_cnt) { /* * get "random" index between 0 & * bd_cnt */ ASSERT(bd_cnt && (bd_cnt < STARFIRE_MAX_BOARDS+1)); idx = random_idx(bd_cnt); page = *sortlist[idx]; result = page_trylock(page, SE_EXCL); ASSERT(result); /* Delete from sort_list */ /* & Append to freelist */ /* Big pages use vp_add - 8k don't */ if (size != 0) { page_vpsub(sortlist[idx], page); page_vpadd(freelist, page); } else { mach_page_sub(sortlist[idx], page); mach_page_add(freelist, page); } /* needed for indexing tmp lists */ lbn = PFN_2_LBN(page->p_pagenum); /* * if this was the last page on this * list? */ if (*sortlist[idx] == NULL) { /* have to find brd list */ /* idx is lbn? -- No! */ /* sortlist, brdlist */ /* have diff indexs */ bd_flags &= ~(1 << lbn); --bd_cnt; /* * redo the sortlist so only * bd_cnt choices show up */ bds = 0; for (idy = 0; idy < STARFIRE_MAX_BOARDS; idy++) { if (bdlist[idy]) { sortlist[bds++] /* CSTYLED */ = &bdlist[idy]; } } } page_unlock(page); } mutex_exit(pcm); } } } }
/* * Process a vnode's page list for all pages whose offset is >= off. * Pages are to either be free'd, invalidated, or written back to disk. * * An "exclusive" lock is acquired for each page if B_INVAL or B_FREE * is specified, otherwise they are "shared" locked. * * Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_TRUNC} * * Special marker page_t's are inserted in the list in order * to keep track of where we are in the list when locks are dropped. * * Note the list is circular and insertions can happen only at the * head and tail of the list. The algorithm ensures visiting all pages * on the list in the following way: * * Drop two marker pages at the end of the list. * * Move one marker page backwards towards the start of the list until * it is at the list head, processing the pages passed along the way. * * Due to race conditions when the vphm mutex is dropped, additional pages * can be added to either end of the list, so we'll continue to move * the marker and process pages until it is up against the end marker. * * There is one special exit condition. If we are processing a VMODSORT * vnode and only writing back modified pages, we can stop as soon as * we run into an unmodified page. This makes fsync(3) operations fast. */ int pvn_vplist_dirty( vnode_t *vp, u_offset_t off, int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *), int flags, cred_t *cred) { page_t *pp; page_t *mark; /* marker page that moves toward head */ page_t *end; /* marker page at end of list */ int err = 0; int error; kmutex_t *vphm; se_t se; page_t **where_to_move; ASSERT(vp->v_type != VCHR); if (vp->v_pages == NULL) return (0); /* * Serialize vplist_dirty operations on this vnode by setting VVMLOCK. * * Don't block on VVMLOCK if B_ASYNC is set. This prevents sync() * from getting blocked while flushing pages to a dead NFS server. */ mutex_enter(&vp->v_lock); if ((vp->v_flag & VVMLOCK) && (flags & B_ASYNC)) { mutex_exit(&vp->v_lock); return (EAGAIN); } while (vp->v_flag & VVMLOCK) cv_wait(&vp->v_cv, &vp->v_lock); if (vp->v_pages == NULL) { mutex_exit(&vp->v_lock); return (0); } vp->v_flag |= VVMLOCK; mutex_exit(&vp->v_lock); /* * Set up the marker pages used to walk the list */ end = kmem_cache_alloc(marker_cache, KM_SLEEP); end->p_vnode = vp; end->p_offset = (u_offset_t)-2; mark = kmem_cache_alloc(marker_cache, KM_SLEEP); mark->p_vnode = vp; mark->p_offset = (u_offset_t)-1; /* * Grab the lock protecting the vnode's page list * note that this lock is dropped at times in the loop. */ vphm = page_vnode_mutex(vp); mutex_enter(vphm); if (vp->v_pages == NULL) goto leave; /* * insert the markers and loop through the list of pages */ page_vpadd(&vp->v_pages->p_vpprev->p_vpnext, mark); page_vpadd(&mark->p_vpnext, end); for (;;) { /* * If only doing an async write back, then we can * stop as soon as we get to start of the list. */ if (flags == B_ASYNC && vp->v_pages == mark) break; /* * otherwise stop when we've gone through all the pages */ if (mark->p_vpprev == end) break; pp = mark->p_vpprev; if (vp->v_pages == pp) where_to_move = &vp->v_pages; else where_to_move = &pp->p_vpprev->p_vpnext; ASSERT(pp->p_vnode == vp); /* * If just flushing dirty pages to disk and this vnode * is using a sorted list of pages, we can stop processing * as soon as we find an unmodified page. Since all the * modified pages are visited first. */ if (IS_VMODSORT(vp) && !(flags & (B_INVAL | B_FREE | B_TRUNC))) { if (!hat_ismod(pp) && !page_io_locked(pp)) { #ifdef DEBUG /* * For debug kernels examine what should be * all the remaining clean pages, asserting * that they are not modified. */ page_t *chk = pp; int attr; page_vpsub(&vp->v_pages, mark); page_vpadd(where_to_move, mark); do { chk = chk->p_vpprev; ASSERT(chk != end); if (chk == mark) continue; attr = hat_page_getattr(chk, P_MOD | P_REF); if ((attr & P_MOD) == 0) continue; panic("v_pages list not all clean: " "page_t*=%p vnode=%p off=%lx " "attr=0x%x last clean page_t*=%p\n", (void *)chk, (void *)chk->p_vnode, (long)chk->p_offset, attr, (void *)pp); } while (chk != vp->v_pages); #endif break; } else if (!(flags & B_ASYNC) && !hat_ismod(pp)) { /* * Couldn't get io lock, wait until IO is done. * Block only for sync IO since we don't want * to block async IO. */ mutex_exit(vphm); page_io_wait(pp); mutex_enter(vphm); continue; } } /* * Skip this page if the offset is out of the desired range. * Just move the marker and continue. */ if (pp->p_offset < off) { page_vpsub(&vp->v_pages, mark); page_vpadd(where_to_move, mark); continue; } /* * If we are supposed to invalidate or free this * page, then we need an exclusive lock. */ se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED; /* * We must acquire the page lock for all synchronous * operations (invalidate, free and write). */ if ((flags & B_INVAL) != 0 || (flags & B_ASYNC) == 0) { /* * If the page_lock() drops the mutex * we must retry the loop. */ if (!page_lock(pp, se, vphm, P_NO_RECLAIM)) continue; /* * It's ok to move the marker page now. */ page_vpsub(&vp->v_pages, mark); page_vpadd(where_to_move, mark); } else { /* * update the marker page for all remaining cases */ page_vpsub(&vp->v_pages, mark); page_vpadd(where_to_move, mark); /* * For write backs, If we can't lock the page, it's * invalid or in the process of being destroyed. Skip * it, assuming someone else is writing it. */ if (!page_trylock(pp, se)) continue; } ASSERT(pp->p_vnode == vp); /* * Successfully locked the page, now figure out what to * do with it. Free pages are easily dealt with, invalidate * if desired or just go on to the next page. */ if (PP_ISFREE(pp)) { if ((flags & B_INVAL) == 0) { page_unlock(pp); continue; } /* * Invalidate (destroy) the page. */ mutex_exit(vphm); page_destroy_free(pp); mutex_enter(vphm); continue; } /* * pvn_getdirty() figures out what do do with a dirty page. * If the page is dirty, the putapage() routine will write it * and will kluster any other adjacent dirty pages it can. * * pvn_getdirty() and `(*putapage)' unlock the page. */ mutex_exit(vphm); if (pvn_getdirty(pp, flags)) { error = (*putapage)(vp, pp, NULL, NULL, flags, cred); if (!err) err = error; } mutex_enter(vphm); } page_vpsub(&vp->v_pages, mark); page_vpsub(&vp->v_pages, end); leave: /* * Release v_pages mutex, also VVMLOCK and wakeup blocked thrds */ mutex_exit(vphm); kmem_cache_free(marker_cache, mark); kmem_cache_free(marker_cache, end); mutex_enter(&vp->v_lock); vp->v_flag &= ~VVMLOCK; cv_broadcast(&vp->v_cv); mutex_exit(&vp->v_lock); return (err); }
void pvn_write_done(page_t *plist, int flags) { int dfree = 0; int pgrec = 0; int pgout = 0; int pgpgout = 0; int anonpgout = 0; int anonfree = 0; int fspgout = 0; int fsfree = 0; int execpgout = 0; int execfree = 0; page_t *pp; struct cpu *cpup; struct vnode *vp = NULL; /* for probe */ uint_t ppattr; kmutex_t *vphm = NULL; ASSERT((flags & B_READ) == 0); /* * If we are about to start paging anyway, start freeing pages. */ if (write_free && freemem < lotsfree + pages_before_pager && (flags & B_ERROR) == 0) { flags |= B_FREE; } /* * Handle each page involved in the i/o operation. */ while (plist != NULL) { pp = plist; ASSERT(PAGE_LOCKED(pp) && page_iolock_assert(pp)); page_sub(&plist, pp); /* Kernel probe support */ if (vp == NULL) vp = pp->p_vnode; if (((flags & B_ERROR) == 0) && IS_VMODSORT(vp)) { /* * Move page to the top of the v_page list. * Skip pages modified during IO. */ vphm = page_vnode_mutex(vp); mutex_enter(vphm); if ((pp->p_vpnext != pp) && !hat_ismod(pp)) { page_vpsub(&vp->v_pages, pp); page_vpadd(&vp->v_pages, pp); } mutex_exit(vphm); } if (flags & B_ERROR) { /* * Write operation failed. We don't want * to destroy (or free) the page unless B_FORCE * is set. We set the mod bit again and release * all locks on the page so that it will get written * back again later when things are hopefully * better again. * If B_INVAL and B_FORCE is set we really have * to destroy the page. */ if ((flags & (B_INVAL|B_FORCE)) == (B_INVAL|B_FORCE)) { page_io_unlock(pp); /*LINTED: constant in conditional context*/ VN_DISPOSE(pp, B_INVAL, 0, kcred); } else { hat_setmod_only(pp); page_io_unlock(pp); page_unlock(pp); } } else if (flags & B_INVAL) { /* * XXX - Failed writes with B_INVAL set are * not handled appropriately. */ page_io_unlock(pp); /*LINTED: constant in conditional context*/ VN_DISPOSE(pp, B_INVAL, 0, kcred); } else if (flags & B_FREE ||!hat_page_is_mapped(pp)) { /* * Update statistics for pages being paged out */ if (pp->p_vnode) { if (IS_SWAPFSVP(pp->p_vnode)) { anonpgout++; } else { if (pp->p_vnode->v_flag & VVMEXEC) { execpgout++; } else { fspgout++; } } } page_io_unlock(pp); pgout = 1; pgpgout++; TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT, "page_ws_out:pp %p", pp); /* * The page_struct_lock need not be acquired to * examine "p_lckcnt" and "p_cowcnt" since we'll * have an "exclusive" lock if the upgrade succeeds. */ if (page_tryupgrade(pp) && pp->p_lckcnt == 0 && pp->p_cowcnt == 0) { /* * Check if someone has reclaimed the * page. If ref and mod are not set, no * one is using it so we can free it. * The rest of the system is careful * to use the NOSYNC flag to unload * translations set up for i/o w/o * affecting ref and mod bits. * * Obtain a copy of the real hardware * mod bit using hat_pagesync(pp, HAT_DONTZERO) * to avoid having to flush the cache. */ ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD); ck_refmod: if (!(ppattr & (P_REF | P_MOD))) { if (hat_page_is_mapped(pp)) { /* * Doesn't look like the page * was modified so now we * really have to unload the * translations. Meanwhile * another CPU could've * modified it so we have to * check again. We don't loop * forever here because now * the translations are gone * and no one can get a new one * since we have the "exclusive" * lock on the page. */ (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); ppattr = hat_page_getattr(pp, P_REF | P_MOD); goto ck_refmod; } /* * Update statistics for pages being * freed */ if (pp->p_vnode) { if (IS_SWAPFSVP(pp->p_vnode)) { anonfree++; } else { if (pp->p_vnode->v_flag & VVMEXEC) { execfree++; } else { fsfree++; } } } /*LINTED: constant in conditional ctx*/ VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred); dfree++; } else { page_unlock(pp); pgrec++; TRACE_1(TR_FAC_VM, TR_PAGE_WS_FREE, "page_ws_free:pp %p", pp); } } else { /* * Page is either `locked' in memory * or was reclaimed and now has a * "shared" lock, so release it. */ page_unlock(pp); } } else { /* * Neither B_FREE nor B_INVAL nor B_ERROR. * Just release locks. */ page_io_unlock(pp); page_unlock(pp); } } CPU_STATS_ENTER_K(); cpup = CPU; /* get cpup now that CPU cannot change */ CPU_STATS_ADDQ(cpup, vm, dfree, dfree); CPU_STATS_ADDQ(cpup, vm, pgrec, pgrec); CPU_STATS_ADDQ(cpup, vm, pgout, pgout); CPU_STATS_ADDQ(cpup, vm, pgpgout, pgpgout); CPU_STATS_ADDQ(cpup, vm, anonpgout, anonpgout); CPU_STATS_ADDQ(cpup, vm, anonfree, anonfree); CPU_STATS_ADDQ(cpup, vm, fspgout, fspgout); CPU_STATS_ADDQ(cpup, vm, fsfree, fsfree); CPU_STATS_ADDQ(cpup, vm, execpgout, execpgout); CPU_STATS_ADDQ(cpup, vm, execfree, execfree); CPU_STATS_EXIT_K(); /* Kernel probe */ TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */, tnf_opaque, vnode, vp, tnf_ulong, pages_pageout, pgpgout, tnf_ulong, pages_freed, dfree, tnf_ulong, pages_reclaimed, pgrec); }