/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We hold the mm semaphore and the page_table_lock on entry and exit * with the page_table_lock released. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; if (!TryLockPage(old_page)) { int reuse = can_share_swap_page(old_page); unlock_page(old_page); if (reuse) { #ifndef CONFIG_SUPERH /* Not needed for VIPT cache */ flush_cache_page(vma, address); #endif establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } } /* * Ok, we need to copy. Oh, well.. */ page_cache_get(old_page); spin_unlock(&mm->page_table_lock); new_page = alloc_page(GFP_HIGHUSER); if (!new_page) goto no_mem; copy_cow_page(old_page,new_page,address); /* * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, new_page, address, page_table); lru_cache_add(new_page); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: page_cache_release(old_page); return -1; }
/* * Perform a free_page(), also freeing any swap cache associated with * this page if it is the last user of the page. Can not do a lock_page, * as we are holding the page_table_lock spinlock. */ void free_page_and_swap_cache(struct page *page) { /* * If we are the only user, then try to free up the swap cache. */ if (PageSwapCache(page) && !TryLockPage(page)) { if (!is_page_shared(page)) { delete_from_swap_cache_nolock(page); } UnlockPage(page); } page_cache_release(page); }
/* * Perform a free_page(), also freeing any swap cache associated with * this page if it is the last user of the page. Can not do a lock_page, * as we are holding the page_table_lock spinlock. */ void free_page_and_swap_cache(struct page *page) { /* * If we are the only user, then try to free up the swap cache. * * Its ok to check for PageSwapCache without the page lock * here because we are going to recheck again inside * exclusive_swap_page() _with_ the lock. * - Marcelo */ if (PageSwapCache(page) && !TryLockPage(page)) { remove_exclusive_swap_page(page); UnlockPage(page); } page_cache_release(page); }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) { pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page->zone, classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry); set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */ preserve: set_pte(page_table, pte); UnlockPage(page); return 0; }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) { struct list_head * entry; int max_scan = nr_inactive_pages / priority; int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); spin_lock(&pagemap_lru_lock); while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { struct page * page; /* lock depth is 1 or 2 */ if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); if (unlikely(!PageLRU(page))) BUG(); if (unlikely(PageActive(page))) BUG(); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page->zone, classzone)) continue; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if ((PageDirty(page) || DelallocPage(page)) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * this is the non-racy check for busy page. */ if (!page->mapping || !is_page_cache_freeable(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped >= 0) continue; /* * Alert! We've found too many mapped pages on the * inactive list, so we start swapping out now! */ spin_unlock(&pagemap_lru_lock); swap_out(priority, gfp_mask, classzone); return nr_pages; } /* * It is critical to check PageDirty _after_ we made sure * the page is freeable* so not in use by anybody. */ if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } __lru_cache_del(page); UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); return nr_pages; }
/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with the page table read-lock held, and need to exit without * it. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; /* * We can avoid the copy if: * - we're the only user (count == 1) * - the only other user is the swap cache, * and the only swap cache user is itself, * in which case we can just continue to * use the same swap cache (it will be * marked dirty). */ switch (page_count(old_page)) { case 2: /* * Lock the page so that no one can look it up from * the swap cache, grab a reference and start using it. * Can not do lock_page, holding page_table_lock. */ if (!PageSwapCache(old_page) || TryLockPage(old_page)) break; if (is_page_shared(old_page)) { UnlockPage(old_page); break; } UnlockPage(old_page); /* FallThrough */ case 1: flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } /* * Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); new_page = page_cache_alloc(); if (!new_page) return -1; spin_lock(&mm->page_table_lock); /* * Re-check the pte - we dropped the lock */ if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, old_page, new_page, address, page_table); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; }
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) { struct kiobuf *iobuf; int i, j; struct page *page, **ppage; int doublepage = 0; int repeat = 0; repeat: for (i = 0; i < nr; i++) { iobuf = iovec[i]; if (iobuf->locked) continue; iobuf->locked = 1; ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { page = *ppage; if (!page) continue; if (TryLockPage(page)) goto retry; } } return 0; retry: /* * We couldn't lock one of the pages. Undo the locking so far, * wait on the page we got to, and try again. */ unlock_kiovec(nr, iovec); if (!wait) return -EAGAIN; /* * Did the release also unlock the page we got stuck on? */ if (!PageLocked(page)) { /* * If so, we may well have the page mapped twice * in the IO address range. Bad news. Of * course, it _might_ just be a coincidence, * but if it happens more than once, chances * are we have a double-mapped page. */ if (++doublepage >= 3) return -EINVAL; /* Try again... */ wait_on_page(page); } if (++repeat < 16) goto repeat; return -EAGAIN; }
int page_launder(int gfp_mask, int sync) { int launder_loop, maxscan, cleaned_pages, maxlaunder; int can_get_io_locks; struct list_head * page_lru; struct page * page; /* * We can only grab the IO locks (eg. for flushing dirty * buffers to disk) if __GFP_IO is set. */ can_get_io_locks = gfp_mask & __GFP_IO; launder_loop = 0; maxlaunder = 0; cleaned_pages = 0; dirty_page_rescan: spin_lock(&pagemap_lru_lock); maxscan = nr_inactive_dirty_pages; while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list && maxscan-- > 0) { page = list_entry(page_lru, struct page, lru); /* Wrong page on list?! (list corruption, should not happen) */ if (!PageInactiveDirty(page)) { printk("VM: page_launder, wrong page on list.\n"); list_del(page_lru); nr_inactive_dirty_pages--; page->zone->inactive_dirty_pages--; continue; } /* Page is or was in use? Move it to the active list. */ if (PageTestandClearReferenced(page) || page->age > 0 || (!page->buffers && page_count(page) > 1) || page_ramdisk(page)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); continue; } /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (TryLockPage(page)) { list_del(page_lru); list_add(page_lru, &inactive_dirty_list); continue; } /* * Dirty swap-cache page? Write it out if * last copy.. */ if (PageDirty(page)) { int (*writepage)(struct page *) = page->mapping->a_ops->writepage; int result; if (!writepage) goto page_active; /* First time through? Move it to the back of the list */ if (!launder_loop) { list_del(page_lru); list_add(page_lru, &inactive_dirty_list); UnlockPage(page); continue; } /* OK, do a physical asynchronous write to swap. */ ClearPageDirty(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); result = writepage(page); page_cache_release(page); /* And re-start the thing.. */ spin_lock(&pagemap_lru_lock); if (result != 1) continue; /* writepage refused to do anything */ set_page_dirty(page); goto page_active; } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we either free * the page (in case it was a buffercache only page) or we * move the page to the inactive_clean list. * * On the first round, we should free all previously cleaned * buffer pages */ if (page->buffers) { int wait, clearedbuf; int freed_page = 0; /* * Since we might be doing disk IO, we have to * drop the spinlock and take an extra reference * on the page so it doesn't go away from under us. */ del_page_from_inactive_dirty_list(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); /* Will we do (asynchronous) IO? */ if (launder_loop && maxlaunder == 0 && sync) wait = 2; /* Synchrounous IO */ else if (launder_loop && maxlaunder-- > 0) wait = 1; /* Async IO */ else wait = 0; /* No IO */ /* Try to free the page buffers. */ clearedbuf = try_to_free_buffers(page, wait); /* * Re-take the spinlock. Note that we cannot * unlock the page yet since we're still * accessing the page_struct here... */ spin_lock(&pagemap_lru_lock); /* The buffers were not freed. */ if (!clearedbuf) { add_page_to_inactive_dirty_list(page); /* The page was only in the buffer cache. */ } else if (!page->mapping) { atomic_dec(&buffermem_pages); freed_page = 1; cleaned_pages++; /* The page has more users besides the cache and us. */ } else if (page_count(page) > 2) { add_page_to_active_list(page); /* OK, we "created" a freeable page. */ } else /* page->mapping && page_count(page) == 2 */ { add_page_to_inactive_clean_list(page); cleaned_pages++; } /* * Unlock the page and drop the extra reference. * We can only do it here because we ar accessing * the page struct above. */ UnlockPage(page); page_cache_release(page); /* * If we're freeing buffer cache pages, stop when * we've got enough free memory. */ if (freed_page && !free_shortage()) break; continue; } else if (page->mapping && !PageDirty(page)) { /* * If a page had an extra reference in * deactivate_page(), we will find it here. * Now the page is really freeable, so we * move it to the inactive_clean list. */ del_page_from_inactive_dirty_list(page); add_page_to_inactive_clean_list(page); UnlockPage(page); cleaned_pages++; } else { page_active: /* * OK, we don't know what to do with the page. * It's no use keeping it here, so we move it to * the active list. */ del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); UnlockPage(page); } } spin_unlock(&pagemap_lru_lock); /* * If we don't have enough free pages, we loop back once * to queue the dirty pages for writeout. When we were called * by a user process (that /needs/ a free page) and we didn't * free anything yet, we wait synchronously on the writeout of * MAX_SYNC_LAUNDER pages. * * We also wake up bdflush, since bdflush should, under most * loads, flush out the dirty pages before we have to wait on * IO. */ if (can_get_io_locks && !launder_loop && free_shortage()) { launder_loop = 1; /* If we cleaned pages, never do synchronous IO. */ if (cleaned_pages) sync = 0; /* We only do a few "out of order" flushes. */ maxlaunder = MAX_LAUNDER; /* Kflushd takes care of the rest. */ wakeup_bdflush(0); goto dirty_page_rescan; } /* Return the number of pages moved to the inactive_clean list. */ return cleaned_pages; }
/** * reclaim_page - reclaims one page from the inactive_clean list * @zone: reclaim a page from this zone * * The pages on the inactive_clean can be instantly reclaimed. * The tests look impressive, but most of the time we'll grab * the first page of the list and exit successfully. */ struct page * reclaim_page(zone_t * zone) { struct page * page = NULL; struct list_head * page_lru; int maxscan; /* * We only need the pagemap_lru_lock if we don't reclaim the page, * but we have to grab the pagecache_lock before the pagemap_lru_lock * to avoid deadlocks and most of the time we'll succeed anyway. */ spin_lock(&pagecache_lock); spin_lock(&pagemap_lru_lock); maxscan = zone->inactive_clean_pages; while ((page_lru = zone->inactive_clean_list.prev) != &zone->inactive_clean_list && maxscan--) { page = list_entry(page_lru, struct page, lru); /* Wrong page on list?! (list corruption, should not happen) */ if (!PageInactiveClean(page)) { printk("VM: reclaim_page, wrong page on list.\n"); list_del(page_lru); page->zone->inactive_clean_pages--; continue; } /* Page is or was in use? Move it to the active list. */ if (PageTestandClearReferenced(page) || page->age > 0 || (!page->buffers && page_count(page) > 1)) { del_page_from_inactive_clean_list(page); add_page_to_active_list(page); continue; } /* The page is dirty, or locked, move to inactive_dirty list. */ if (page->buffers || PageDirty(page) || TryLockPage(page)) { del_page_from_inactive_clean_list(page); add_page_to_inactive_dirty_list(page); continue; } /* OK, remove the page from the caches. */ if (PageSwapCache(page)) { __delete_from_swap_cache(page); goto found_page; } if (page->mapping) { __remove_inode_page(page); goto found_page; } /* We should never ever get here. */ printk(KERN_ERR "VM: reclaim_page, found unknown page\n"); list_del(page_lru); zone->inactive_clean_pages--; UnlockPage(page); } /* Reset page pointer, maybe we encountered an unfreeable page. */ page = NULL; goto out; found_page: del_page_from_inactive_clean_list(page); UnlockPage(page); page->age = PAGE_AGE_START; if (page_count(page) != 1) printk("VM: reclaim_page, found page with count %d!\n", page_count(page)); out: spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); memory_pressure++; return page; }
/* * The swap-out functions return 1 if they successfully * threw something out, and we got a free page. It returns * zero if it couldn't do anything, and any other value * indicates it decreased rss, but the page was shared. * * NOTE! If it sleeps, it *must* return 1 to make sure we * don't continue with the swap-out. Otherwise we may be * using a process that no longer actually exists (it might * have died while we slept). */ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { pte_t pte; swp_entry_t entry; struct page * page; int onlist; pte = *page_table; if (!pte_present(pte)) goto out_failed; page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) goto out_failed; if (mm->swap_cnt) mm->swap_cnt--; onlist = PageActive(page); /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { age_page_up(page); goto out_failed; } if (!onlist) /* The page is still mapped, so it can't be freeable... */ age_page_down_ageonly(page); /* * If the page is in active use by us, or if the page * is in active use by others, don't unmap it or * (worse) start unneeded IO. */ if (page->age > 0) goto out_failed; if (TryLockPage(page)) goto out_failed; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ pte = ptep_get_and_clear(page_table); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. * * Return 0, as we didn't actually free any real * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { entry.val = page->index; if (pte_dirty(pte)) set_page_dirty(page); set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: UnlockPage(page); mm->rss--; flush_tlb_page(vma, address); deactivate_page(page); page_cache_release(page); out_failed: return 0; } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ flush_cache_page(vma, address); if (!pte_dirty(pte)) goto drop_pte; /* * Ok, it's really dirty. That means that * we should either create a new swap cache * entry for it, or we should write it back * to its own backing store. */ if (page->mapping) { set_page_dirty(page); goto drop_pte; } /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ entry = get_swap_page(); if (!entry.val) goto out_unlock_restore; /* No swap space left */ /* Add it to the swap cache and mark it dirty */ add_to_swap_cache(page, entry); set_page_dirty(page); goto set_swap_pte; out_unlock_restore: set_pte(page_table, pte); UnlockPage(page); return 0; }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) { struct list_head * entry; int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; int max_mapped = vm_mapped_ratio * nr_pages; while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); BUG_ON(!PageLRU(page)); BUG_ON(PageActive(page)); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page_zone(page), classzone)) continue; max_scan--; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * This is the non-racy check for busy page. * It is critical to check PageDirty _after_ we made sure * the page is freeable so not in use by anybody. * At this point we're guaranteed that page->buffers is NULL, * nobody can refill page->buffers under us because we still * hold the page lock. */ if (!page->mapping || page_count(page) > 1) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped < 0) { spin_unlock(&pagemap_lru_lock); nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) goto out; shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); #ifdef CONFIG_QUOTA shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); #endif if (!*failed_swapout) *failed_swapout = !swap_out(classzone); max_mapped = nr_pages * vm_mapped_ratio; spin_lock(&pagemap_lru_lock); refill_inactive(nr_pages, classzone); } continue; } if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } __lru_cache_del(page); /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); out: return nr_pages; }