/** * vxfs_immed_readpage - read part of an immed inode into pagecache * @file: file context (unused) * @page: page frame to fill in. * * Description: * vxfs_immed_readpage reads a part of the immed area of the * file that hosts @pp into the pagecache. * * Returns: * Zero on success, else a negative error code. * * Locking status: * @page is locked and will be unlocked. */ static int vxfs_immed_readpage(struct file *fp, struct page *pp) { struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); u_int64_t offset = pp->index << PAGE_CACHE_SHIFT; caddr_t kaddr; kaddr = kmap(pp); memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE); kunmap(pp); flush_dcache_page(pp); SetPageUptodate(pp); UnlockPage(pp); return 0; }
/* * The swap lock map insists that pages be in the page cache! * Therefore we can't use it. Later when we can remove the need for the * lock map and we can reduce the number of functions exported. */ void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf) { struct page *page = virt_to_page(buf); if (!PageLocked(page)) PAGE_BUG(page); if (PageSwapCache(page)) PAGE_BUG(page); if (page->mapping) PAGE_BUG(page); /* needs sync_page to wait I/O completation */ page->mapping = &swapper_space; if (!rw_swap_page_base(rw, entry, page)) UnlockPage(page); wait_on_page(page); page->mapping = NULL; }
/* * Write an mmapped page to the server. */ int nfs_writepage(struct page *page) { struct inode *inode; unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; int err; struct address_space *mapping = page->mapping; if (!mapping) BUG(); inode = mapping->host; if (!inode) BUG(); end_index = inode->i_size >> PAGE_CACHE_SHIFT; /* Ensure we've flushed out any previous writes */ nfs_wb_page(inode,page); /* easy case */ if (page->index < end_index) goto do_it; /* things got complicated... */ offset = inode->i_size & (PAGE_CACHE_SIZE-1); /* OK, are we completely out? */ err = -EIO; if (page->index >= end_index+1 || !offset) goto out; do_it: lock_kernel(); if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; } else { err = nfs_writepage_sync(NULL, inode, page, 0, offset); if (err == offset) err = 0; } unlock_kernel(); out: UnlockPage(page); return err; }
/* Releases the page */ void sysv_set_link(struct sysv_dir_entry *de, struct page *page, struct inode *inode) { struct inode *dir = (struct inode*)page->mapping->host; unsigned from = (char *)de-(char*)page_address(page); unsigned to = from + SYSV_DIRSIZE; int err; lock_page(page); err = page->mapping->a_ops->prepare_write(NULL, page, from, to); if (err) BUG(); de->inode = cpu_to_fs16(inode->i_sb, inode->i_ino); err = dir_commit_chunk(page, from, to); UnlockPage(page); dir_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); }
/** Write data through a single page. If the page is not found or not valid anymore, read media onto iActive page first, then write data through iActive page. @param aPos the starting position of the media address to be write. @param aData the starting address that the writing content lives in the ram. @param aDataLen the length of the content to be written. @pre aDataLen should be no more than page size. */ void CDynamicDirCache::WriteDataOntoSinglePageL(TInt64 aPos, const TUint8* aData, TUint32 aDataLen) { ASSERT(aDataLen <= iPageSizeInBytes); //-- the data section is in the cache page entirely, take data directly from the cache TDynamicDirCachePage* pPage = FindPageByPos(aPos); if (pPage) { // lock page before writing, if (LockPage(pPage) != NULL) { //-- update cache Mem::Copy(pPage->PtrInPage(aPos), aData, aDataLen); } else { ASSERT(pPage->PageType() == TDynamicDirCachePage::EUnlocked); DeQueue(pPage); LookupTblRemove(pPage->StartPos()); DecommitPage(pPage); delete pPage; pPage = NULL; } } // if page not found or page data not valid anymore, use active page to read data in if (!pPage) { pPage = UpdateActivePageL(aPos); //-- update cache Mem::Copy(pPage->PtrInPage(aPos), aData, aDataLen); } // make sure the page is unlocked after use if (pPage->PageType() == TDynamicDirCachePage::EUnlocked) { UnlockPage(pPage); } // always make writting events MRU MakePageMRU(aPos); return; }
void unmap_kiobuf (struct kiobuf *iobuf) { int i; struct page *map; for (i = 0; i < iobuf->nr_pages; i++) { map = iobuf->maplist[i]; if (map) { if (iobuf->locked) UnlockPage(map); /* FIXME: cache flush missing for rw==READ * FIXME: call the correct reference counting function */ page_cache_release(map); } } iobuf->nr_pages = 0; iobuf->locked = 0; }
static int cramfs_readpage(struct file *file, struct page * page) { struct inode *inode = page->mapping->host; u32 maxblock, bytes_filled; void *pgdata; maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; bytes_filled = 0; if (page->index < maxblock) { struct super_block *sb = inode->i_sb; u32 blkptr_offset = OFFSET(inode) + page->index*4; u32 start_offset, compr_len; start_offset = OFFSET(inode) + maxblock*4; down(&read_mutex); if (page->index) start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, 4); compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - start_offset); up(&read_mutex); pgdata = kmap(page); if (compr_len == 0) ; /* hole */ else if (compr_len > (PAGE_CACHE_SIZE << 1)) printk(KERN_ERR "cramfs: bad compressed blocksize %u\n", compr_len); else { down(&read_mutex); bytes_filled = cramfs_uncompress_block(pgdata, PAGE_CACHE_SIZE, cramfs_read(sb, start_offset, compr_len), compr_len); up(&read_mutex); } } else pgdata = kmap(page); memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled); kunmap(page); flush_dcache_page(page); SetPageUptodate(page); UnlockPage(page); return 0; }
int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = (char*)page_address(page); unsigned from = (char*)de - kaddr; unsigned to = from + SYSV_DIRSIZE; int err; lock_page(page); err = mapping->a_ops->prepare_write(NULL, page, from, to); if (err) BUG(); de->inode = 0; err = dir_commit_chunk(page, from, to); UnlockPage(page); dir_put_page(page); inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty(inode); return err; }
/** Implementation of pure virtual function. @see MWTCacheInterface::PosCached() */ TUint32 CDynamicDirCache::PosCached(const TInt64& aPos, TInt64& aCachedPosStart) { const TInt64 pageStartMedPos = CalcPageStartPos(aPos); // only search the page in lookup table // NOTE: we don't count the active page into acount here, // this is to avoid pulling next pages recursively TDynamicDirCachePage* pPage = LookupTblFind(pageStartMedPos); // then check if page is still valid if page is on Unlocked Page Queue if (pPage && pPage->PageType() == TDynamicDirCachePage::EUnlocked) { if (LockPage(pPage) != NULL) { // __PRINT1(_L("CDynamicDirCache::PosCached: page(0x%lx) found on Unlocked Queue!"), aPos); // have to unlock it before returning, otherwise there will be memory leak UnlockPage(pPage); aCachedPosStart = pPage->StartPos(); return pPage->PageSizeInBytes(); } else // if the unlocked page is not valid anymore, remove it { DeQueue(pPage); LookupTblRemove(pPage->StartPos()); DecommitPage(pPage); delete pPage; pPage = NULL; } } // otherwise if page is already locked or valid active page else if (pPage) { __PRINT1(_L("CDynamicDirCache::PosCached: page(0x%lx) on Locked Queue!"), aPos); aCachedPosStart = pPage->StartPos(); return pPage->PageSizeInBytes(); } // page is not found or not valid anymore return 0; }
static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, swp_entry_t entry, int write_access) { struct page *page = lookup_swap_cache(entry); pte_t pte; if (!page) { lock_kernel(); swapin_readahead(entry); page = read_swap_cache(entry); unlock_kernel(); if (!page) return -1; flush_page_to_ram(page); flush_icache_page(vma, page); } mm->rss++; pte = mk_pte(page, vma->vm_page_prot); /* * Freeze the "shared"ness of the page, ie page_count + swap_count. * Must lock page before transferring our swap count to already * obtained page count. */ lock_page(page); swap_free(entry); if (write_access && !is_page_shared(page)) pte = pte_mkwrite(pte_mkdirty(pte)); UnlockPage(page); set_pte(page_table, pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); return 1; /* Minor fault */ }
/* * Free the swap entry like above, but also try to * free the page cache entry if it is the last user. */ void free_swap_and_cache(swp_entry_t entry) { struct swap_info_struct * p; struct page *page = NULL; p = swap_info_get(entry); if (p) { if (swap_entry_free(p, SWP_OFFSET(entry)) == 1) page = find_trylock_page(&swapper_space, entry.val); swap_info_put(p); } if (page) { page_cache_get(page); /* Only cache user (+us), or swap space full? Free it! */ if (page_count(page) - !!page->buffers == 2 || vm_swap_full()) { delete_from_swap_cache(page); SetPageDirty(page); } UnlockPage(page); page_cache_release(page); } }
int unlock_kiovec(int nr, struct kiobuf *iovec[]) { struct kiobuf *iobuf; int i, j; struct page *page, **ppage; for (i = 0; i < nr; i++) { iobuf = iovec[i]; if (!iobuf->locked) continue; iobuf->locked = 0; ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { page = *ppage; if (!page) continue; UnlockPage(page); } } return 0; }
/** Check if the number of (locked pages + iActive page) and unlocked pages have exceeded minimum allowed page number and maximum allowed page number respectively. */ void CDynamicDirCache::CheckThresholds() { while (iLockedQCount + 1 > iMinSizeInPages) { TDynamicDirCachePage* movePage = iLockedQ.Last(); UnlockPage(movePage); DeQueue(movePage); TInt err = LookupTblRemove(movePage->StartPos()); ASSERT(err == KErrNone); // if it is a valid page, add onto unlocked queue if (movePage->StartPos() != 0) { ASSERT(movePage->IsValid()); AddFirstOntoQueue(movePage, TDynamicDirCachePage::EUnlocked); err = LookupTblAdd(movePage); ASSERT(err == KErrNone); } else // reserved page, delete { DecommitPage(movePage); delete movePage; } } // if unlocked queue exceeds limit, delete LRU page // note: all pages on unlocked queue should be valid while (iUnlockedQCount > iMaxSizeInPages - iMinSizeInPages) { TDynamicDirCachePage* removePage = iUnlockedQ.Last(); ASSERT(removePage->StartPos() != 0 && removePage->IsValid()); DeQueue(removePage); LookupTblRemove(removePage->StartPos()); DecommitPage(removePage); delete removePage; } }
/* * Move tuples from pending pages into regular GIN structure. * * On first glance it looks completely not crash-safe. But if we crash * after posting entries to the main index and before removing them from the * pending list, it's okay because when we redo the posting later on, nothing * bad will happen. * * fill_fsm indicates that ginInsertCleanup should add deleted pages * to FSM otherwise caller is responsible to put deleted pages into * FSM. * * If stats isn't null, we count deleted pending pages into the counts. */ void ginInsertCleanup(GinState *ginstate, bool full_clean, bool fill_fsm, IndexBulkDeleteResult *stats) { Relation index = ginstate->index; Buffer metabuffer, buffer; Page metapage, page; GinMetaPageData *metadata; MemoryContext opCtx, oldCtx; BuildAccumulator accum; KeyArray datums; BlockNumber blkno, blknoFinish; bool cleanupFinish = false; bool fsm_vac = false; Size workMemory; bool inVacuum = (stats == NULL); /* * We would like to prevent concurrent cleanup process. For that we will * lock metapage in exclusive mode using LockPage() call. Nobody other * will use that lock for metapage, so we keep possibility of concurrent * insertion into pending list */ if (inVacuum) { /* * We are called from [auto]vacuum/analyze or gin_clean_pending_list() * and we would like to wait concurrent cleanup to finish. */ LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); workMemory = (IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1) ? autovacuum_work_mem : maintenance_work_mem; } else { /* * We are called from regular insert and if we see concurrent cleanup * just exit in hope that concurrent process will clean up pending * list. */ if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock)) return; workMemory = work_mem; } metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber) { /* Nothing to do */ UnlockReleaseBuffer(metabuffer); UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); return; } /* * Remember a tail page to prevent infinite cleanup if other backends add * new tuples faster than we can cleanup. */ blknoFinish = metadata->tail; /* * Read and lock head of pending list */ blkno = metadata->head; buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); LockBuffer(metabuffer, GIN_UNLOCK); /* * Initialize. All temporary space will be in opCtx */ opCtx = AllocSetContextCreate(CurrentMemoryContext, "GIN insert cleanup temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldCtx = MemoryContextSwitchTo(opCtx); initKeyArray(&datums, 128); ginInitBA(&accum); accum.ginstate = ginstate; /* * At the top of this loop, we have pin and lock on the current page of * the pending list. However, we'll release that before exiting the loop. * Note we also have pin but not lock on the metapage. */ for (;;) { Assert(!GinPageIsDeleted(page)); /* * Are we walk through the page which as we remember was a tail when * we start our cleanup? But if caller asks us to clean up whole * pending list then ignore old tail, we will work until list becomes * empty. */ if (blkno == blknoFinish && full_clean == false) cleanupFinish = true; /* * read page's datums into accum */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); vacuum_delay_point(); /* * Is it time to flush memory to disk? Flush if we are at the end of * the pending list, or if we have a full row and memory is getting * full. */ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || (GinPageHasFullRow(page) && (accum.allocatedMemory >= workMemory * 1024L))) { ItemPointerData *list; uint32 nlist; Datum key; GinNullCategory category; OffsetNumber maxoff, attnum; /* * Unlock current page to increase performance. Changes of page * will be checked later by comparing maxoff after completion of * memory flush. */ maxoff = PageGetMaxOffsetNumber(page); LockBuffer(buffer, GIN_UNLOCK); /* * Moving collected data into regular structure can take * significant amount of time - so, run it without locking pending * list. */ ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) { ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); vacuum_delay_point(); } /* * Lock the whole list to remove pages */ LockBuffer(metabuffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsDeleted(page)); /* * While we left the page unlocked, more stuff might have gotten * added to it. If so, process those entries immediately. There * shouldn't be very many, so we don't worry about the fact that * we're doing this with exclusive lock. Insertion algorithm * guarantees that inserted row(s) will not continue on next page. * NOTE: intentionally no vacuum_delay_point in this loop. */ if (PageGetMaxOffsetNumber(page) != maxoff) { ginInitBA(&accum); processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); } /* * Remember next page - it will become the new list head */ blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); /* shiftList will do exclusive * locking */ /* * remove read pages from pending list, at this point all content * of read pages is in regular structure */ shiftList(index, metabuffer, blkno, fill_fsm, stats); /* At this point, some pending pages have been freed up */ fsm_vac = true; Assert(blkno == metadata->head); LockBuffer(metabuffer, GIN_UNLOCK); /* * if we removed the whole pending list or we cleanup tail (which * we remembered on start our cleanup process) then just exit */ if (blkno == InvalidBlockNumber || cleanupFinish) break; /* * release memory used so far and reinit state */ MemoryContextReset(opCtx); initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); } else { blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); } /* * Read next page in pending list */ vacuum_delay_point(); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); } UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); ReleaseBuffer(metabuffer); /* * As pending list pages can have a high churn rate, it is desirable to * recycle them immediately to the FreeSpace Map when ordinary backends * clean the list. */ if (fsm_vac && fill_fsm) IndexFreeSpaceMapVacuum(index); /* Clean up temporary space */ MemoryContextSwitchTo(oldCtx); MemoryContextDelete(opCtx); }
/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. */ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; unsigned short *swap_map; unsigned short swcount; struct page *page; swp_entry_t entry; int i = 0; int retval = 0; int reset_overflow = 0; /* * When searching mms for an entry, a good strategy is to * start at the first mm we freed the previous entry from * (though actually we don't notice whether we or coincidence * freed the entry). Initialize this start_mm with a hold. * * A simpler strategy would be to start at the last mm we * freed the previous entry from; but that would take less * advantage of mmlist ordering (now preserved by swap_out()), * which clusters forked address spaces together, most recent * child immediately after parent. If we race with dup_mmap(), * we very much want to resolve parent before child, otherwise * we may miss some entries: using last mm would invert that. */ start_mm = &init_mm; atomic_inc(&init_mm.mm_users); /* * Keep on scanning until all entries have gone. Usually, * one pass through swap_map is enough, but not necessarily: * mmput() removes mm from mmlist before exit_mmap() and its * zap_page_range(). That's not too bad, those entries are * on their way out, and handled faster there than here. * do_munmap() behaves similarly, taking the range out of mm's * vma list before zap_page_range(). But unfortunately, when * unmapping a part of a vma, it takes the whole out first, * then reinserts what's left after (might even reschedule if * open() method called) - so swap entries may be invisible * to swapoff for a while, then reappear - but that is rare. */ while ((i = find_next_to_unuse(si, i))) { /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = SWP_ENTRY(type, i); page = read_swap_cache_async(entry); if (!page) { /* * Either swap_duplicate() failed because entry * has been freed independently, and will not be * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ if (!*swap_map) continue; retval = -ENOMEM; break; } /* * Don't hold on to start_mm if it looks like exiting. */ if (atomic_read(&start_mm->mm_users) == 1) { mmput(start_mm); start_mm = &init_mm; atomic_inc(&init_mm.mm_users); } /* * Wait for and lock page. When do_swap_page races with * try_to_unuse, do_swap_page can handle the fault much * faster than try_to_unuse can locate the entry. This * apparently redundant "wait_on_page" lets try_to_unuse * defer to do_swap_page in such a case - in some tests, * do_swap_page and try_to_unuse repeatedly compete. */ wait_on_page(page); lock_page(page); /* * Remove all references to entry, without blocking. * Whenever we reach init_mm, there's no address space * to search, but use it as a reminder to search shmem. */ swcount = *swap_map; if (swcount > 1) { flush_page_to_ram(page); if (start_mm == &init_mm) shmem_unuse(entry, page); else unuse_process(start_mm, entry, page); } if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; struct mm_struct *mm; spin_lock(&mmlist_lock); while (*swap_map > 1 && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; if (mm == &init_mm) { set_start_mm = 1; shmem_unuse(entry, page); } else unuse_process(mm, entry, page); if (set_start_mm && *swap_map < swcount) { new_start_mm = mm; set_start_mm = 0; } } atomic_inc(&new_start_mm->mm_users); spin_unlock(&mmlist_lock); mmput(start_mm); start_mm = new_start_mm; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { swap_list_lock(); swap_device_lock(si); nr_swap_pages++; *swap_map = 1; swap_device_unlock(si); swap_list_unlock(); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_swap_out could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. * Note shmem_unuse already deleted its from swap cache. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { rw_swap_page(WRITE, page); lock_page(page); } if (PageSwapCache(page)) delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so try_to_swap_out will preserve it. */ SetPageDirty(page); UnlockPage(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. Interruptible check on * signal_pending() would be nice, but changes the spec? */ if (current->need_resched) schedule(); }
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) { int ret = jffs2_do_readpage_nolock(inode, pg); UnlockPage(pg); return ret; }
static int yaffs_writepage(struct page *page) #endif { struct address_space *mapping = page->mapping; loff_t offset = (loff_t) page->index << PAGE_CACHE_SHIFT; struct inode *inode; unsigned long end_index; char *buffer; yaffs_Object *obj; int nWritten = 0; unsigned nBytes; if (!mapping) BUG(); inode = mapping->host; if (!inode) BUG(); if (offset > inode->i_size) { T(YAFFS_TRACE_OS, (KERN_DEBUG "yaffs_writepage at %08x, inode size = %08x!!!\n", (unsigned)(page->index << PAGE_CACHE_SHIFT), (unsigned)inode->i_size)); T(YAFFS_TRACE_OS, (KERN_DEBUG " -> don't care!!\n")); unlock_page(page); return 0; } end_index = inode->i_size >> PAGE_CACHE_SHIFT; if (page->index < end_index) { nBytes = PAGE_CACHE_SIZE; } else { nBytes = inode->i_size & (PAGE_CACHE_SIZE - 1); } get_page(page); buffer = kmap(page); obj = yaffs_InodeToObject(inode); yaffs_GrossLock(obj->myDev); T(YAFFS_TRACE_OS, (KERN_DEBUG "yaffs_writepage at %08x, size %08x\n", (unsigned)(page->index << PAGE_CACHE_SHIFT), nBytes)); T(YAFFS_TRACE_OS, (KERN_DEBUG "writepag0: obj = %05x, ino = %05x\n", (int)obj->variant.fileVariant.fileSize, (int)inode->i_size)); nWritten = yaffs_WriteDataToFile(obj, buffer, page->index << PAGE_CACHE_SHIFT, nBytes, 0); T(YAFFS_TRACE_OS, (KERN_DEBUG "writepag1: obj = %05x, ino = %05x\n", (int)obj->variant.fileVariant.fileSize, (int)inode->i_size)); yaffs_GrossUnlock(obj->myDev); kunmap(page); SetPageUptodate(page); UnlockPage(page); put_page(page); return (nWritten == nBytes) ? 0 : -ENOSPC; }
static int yaffs_readpage_unlock(struct file *f, struct page *pg) { int ret = yaffs_readpage_nolock(f, pg); UnlockPage(pg); return ret; }
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) { struct kiobuf *iobuf; int i, j; struct page *page, **ppage; int doublepage = 0; int repeat = 0; repeat: for (i = 0; i < nr; i++) { iobuf = iovec[i]; if (iobuf->locked) continue; ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { page = *ppage; if (!page) continue; if (TryLockPage(page)) { while (j--) { struct page *tmp = *--ppage; if (tmp) UnlockPage(tmp); } goto retry; } } iobuf->locked = 1; } return 0; retry: /* * We couldn't lock one of the pages. Undo the locking so far, * wait on the page we got to, and try again. */ unlock_kiovec(nr, iovec); if (!wait) return -EAGAIN; /* * Did the release also unlock the page we got stuck on? */ if (!PageLocked(page)) { /* * If so, we may well have the page mapped twice * in the IO address range. Bad news. Of * course, it _might_ just be a coincidence, * but if it happens more than once, chances * are we have a double-mapped page. */ if (++doublepage >= 3) return -EINVAL; /* Try again... */ wait_on_page(page); } if (++repeat < 16) goto repeat; return -EAGAIN; }
/* * This must be called only on pages that have * been verified to be in the swap cache and locked. */ void delete_from_swap_cache(struct page *page) { lock_page(page); delete_from_swap_cache_nolock(page); UnlockPage(page); }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) { struct list_head * entry; int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; int max_mapped = vm_mapped_ratio * nr_pages; while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); BUG_ON(!PageLRU(page)); BUG_ON(PageActive(page)); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page_zone(page), classzone)) continue; max_scan--; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * This is the non-racy check for busy page. * It is critical to check PageDirty _after_ we made sure * the page is freeable so not in use by anybody. * At this point we're guaranteed that page->buffers is NULL, * nobody can refill page->buffers under us because we still * hold the page lock. */ if (!page->mapping || page_count(page) > 1) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped < 0) { spin_unlock(&pagemap_lru_lock); nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) goto out; shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); #ifdef CONFIG_QUOTA shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); #endif if (!*failed_swapout) *failed_swapout = !swap_out(classzone); max_mapped = nr_pages * vm_mapped_ratio; spin_lock(&pagemap_lru_lock); refill_inactive(nr_pages, classzone); } continue; } if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } __lru_cache_del(page); /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); out: return nr_pages; }
/** Implementation of pure virtual function. @see MWTCacheInterface::MakePageMRU() */ void CDynamicDirCache::MakePageMRU(TInt64 aPos) { __PRINT1(_L("MakePageMRU (%lx)"), aPos); // __PRINT4(_L("Current Cache State: iLockedQCount=%d, iUnlockedQCount=%d, iLookupTbl=%d, iMaxSizeInPages=%d"), iLockedQCount, iUnlockedQCount, iLookupTable.Count(), iMaxSizeInPages); // check the MRU page first, if it is already the MRU page, we can return immediately TInt64 pageStartMedPos = CalcPageStartPos(aPos); if (!iLockedQ.IsEmpty()) { if (iLockedQ.First()->StartPos() == pageStartMedPos) { return; } } TDynamicDirCachePage* pPage = FindPageByPos(aPos); if (pPage) { ASSERT(pPage->IsValid()); // lock page before make it MRU if (pPage->PageType() == TDynamicDirCachePage::EUnlocked) { ASSERT(!pPage->IsLocked()); if (LockPage(pPage) == NULL) { DeQueue(pPage); LookupTblRemove(pPage->StartPos()); DecommitPage(pPage); delete pPage; pPage = NULL; } } else { // error checking: page should either be locked or active ASSERT(LockPage(pPage) != NULL); } } // if page not found or page data not valid anymore, use active page to read data if (!pPage) { TRAPD(err, pPage = UpdateActivePageL(aPos)); if (err != KErrNone) { // problem occurred reading active page, return immediately. return; } } // by now, the page is either locked or active page ASSERT(pPage && pPage->IsValid() && pPage->IsLocked()); switch (pPage->PageType()) { // if the page is the active page, we will need to find a new active page for replacement case TDynamicDirCachePage::EActivePage: { TDynamicDirCachePage* newAP = NULL; // if there is more cache room available, try to create a new page first if (!CacheIsFull()) { // allocate and lock a new page TRAPD(err, newAP = AllocateAndLockNewPageL(0)); // if any error ocurrs, return immediately if (err != KErrNone) { // unlock the page that was originally unlocked before leave if (pPage->PageType() == TDynamicDirCachePage::EUnlocked) { UnlockPage(pPage); } return; } if (newAP) { // replace the active page with the new page newAP->SetPageType(TDynamicDirCachePage::EActivePage); iActivePage = newAP; } } // if cache has grown to its max size, or new page allocation failed if (!newAP) { // try to lock the LRU page on the unlocked page queque first if (!iUnlockedQ.IsEmpty()) { newAP = iUnlockedQ.Last(); ASSERT(newAP->IsValid()); if (LockPage(newAP) != NULL) { // deque, reset pos, set new type DeQueue(newAP); LookupTblRemove(newAP->StartPos()); ResetPagePos(newAP); newAP->SetPageType(TDynamicDirCachePage::EActivePage); // replace active page iActivePage = newAP; } // if falied locking the LRU page from unclocked queque, // delete it else { DeQueue(newAP); LookupTblRemove(newAP->StartPos()); DecommitPage(newAP); delete newAP; newAP = NULL; } } } // if still have not found new active page // grab the LRU page from Locked Page Queue for active page if (!newAP) { ASSERT(!iLockedQ.IsEmpty()); newAP = iLockedQ.Last(); // deque, reset pos, set new type DeQueue(newAP); LookupTblRemove(newAP->StartPos()); ResetPagePos(newAP); newAP->SetPageType(TDynamicDirCachePage::EActivePage); // replace active page iActivePage = newAP; } // we should always be able to find a locked page for active page ASSERT(newAP != NULL); // make original page (i.e. former active page) MRU // add onto locked queue AddFirstOntoQueue(pPage, TDynamicDirCachePage::ELocked); // add onto lookuptbl, as active page is not on lookup tbl originally LookupTblAdd(pPage); // check cache limit CheckThresholds(); return; } case TDynamicDirCachePage::EUnlocked: { // if page was originally on Unlocked Page Queque, remove it from Unlocked Page Queue, add it // to the Locked Page Queue and make it MRU DeQueue(pPage); AddFirstOntoQueue(pPage, TDynamicDirCachePage::ELocked); // check cache limit CheckThresholds(); return; } case TDynamicDirCachePage::ELocked: { // otherwise the page was on Locked Page Queue, make it MRU // no need to check cache limit if (pPage != iLockedQ.First()) { DeQueue(pPage); AddFirstOntoQueue(pPage, TDynamicDirCachePage::ELocked); return; } break; } default: ASSERT(0); } }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) { struct list_head * entry; int max_scan = nr_inactive_pages / priority; int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); spin_lock(&pagemap_lru_lock); while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { struct page * page; /* lock depth is 1 or 2 */ if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); if (unlikely(!PageLRU(page))) BUG(); if (unlikely(PageActive(page))) BUG(); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page->zone, classzone)) continue; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if ((PageDirty(page) || DelallocPage(page)) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * this is the non-racy check for busy page. */ if (!page->mapping || !is_page_cache_freeable(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped >= 0) continue; /* * Alert! We've found too many mapped pages on the * inactive list, so we start swapping out now! */ spin_unlock(&pagemap_lru_lock); swap_out(priority, gfp_mask, classzone); return nr_pages; } /* * It is critical to check PageDirty _after_ we made sure * the page is freeable* so not in use by anybody. */ if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } __lru_cache_del(page); UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); return nr_pages; }
int page_launder(int gfp_mask, int sync) { int launder_loop, maxscan, cleaned_pages, maxlaunder; int can_get_io_locks; struct list_head * page_lru; struct page * page; /* * We can only grab the IO locks (eg. for flushing dirty * buffers to disk) if __GFP_IO is set. */ can_get_io_locks = gfp_mask & __GFP_IO; launder_loop = 0; maxlaunder = 0; cleaned_pages = 0; dirty_page_rescan: spin_lock(&pagemap_lru_lock); maxscan = nr_inactive_dirty_pages; while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list && maxscan-- > 0) { page = list_entry(page_lru, struct page, lru); /* Wrong page on list?! (list corruption, should not happen) */ if (!PageInactiveDirty(page)) { printk("VM: page_launder, wrong page on list.\n"); list_del(page_lru); nr_inactive_dirty_pages--; page->zone->inactive_dirty_pages--; continue; } /* Page is or was in use? Move it to the active list. */ if (PageTestandClearReferenced(page) || page->age > 0 || (!page->buffers && page_count(page) > 1) || page_ramdisk(page)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); continue; } /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (TryLockPage(page)) { list_del(page_lru); list_add(page_lru, &inactive_dirty_list); continue; } /* * Dirty swap-cache page? Write it out if * last copy.. */ if (PageDirty(page)) { int (*writepage)(struct page *) = page->mapping->a_ops->writepage; int result; if (!writepage) goto page_active; /* First time through? Move it to the back of the list */ if (!launder_loop) { list_del(page_lru); list_add(page_lru, &inactive_dirty_list); UnlockPage(page); continue; } /* OK, do a physical asynchronous write to swap. */ ClearPageDirty(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); result = writepage(page); page_cache_release(page); /* And re-start the thing.. */ spin_lock(&pagemap_lru_lock); if (result != 1) continue; /* writepage refused to do anything */ set_page_dirty(page); goto page_active; } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we either free * the page (in case it was a buffercache only page) or we * move the page to the inactive_clean list. * * On the first round, we should free all previously cleaned * buffer pages */ if (page->buffers) { int wait, clearedbuf; int freed_page = 0; /* * Since we might be doing disk IO, we have to * drop the spinlock and take an extra reference * on the page so it doesn't go away from under us. */ del_page_from_inactive_dirty_list(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); /* Will we do (asynchronous) IO? */ if (launder_loop && maxlaunder == 0 && sync) wait = 2; /* Synchrounous IO */ else if (launder_loop && maxlaunder-- > 0) wait = 1; /* Async IO */ else wait = 0; /* No IO */ /* Try to free the page buffers. */ clearedbuf = try_to_free_buffers(page, wait); /* * Re-take the spinlock. Note that we cannot * unlock the page yet since we're still * accessing the page_struct here... */ spin_lock(&pagemap_lru_lock); /* The buffers were not freed. */ if (!clearedbuf) { add_page_to_inactive_dirty_list(page); /* The page was only in the buffer cache. */ } else if (!page->mapping) { atomic_dec(&buffermem_pages); freed_page = 1; cleaned_pages++; /* The page has more users besides the cache and us. */ } else if (page_count(page) > 2) { add_page_to_active_list(page); /* OK, we "created" a freeable page. */ } else /* page->mapping && page_count(page) == 2 */ { add_page_to_inactive_clean_list(page); cleaned_pages++; } /* * Unlock the page and drop the extra reference. * We can only do it here because we ar accessing * the page struct above. */ UnlockPage(page); page_cache_release(page); /* * If we're freeing buffer cache pages, stop when * we've got enough free memory. */ if (freed_page && !free_shortage()) break; continue; } else if (page->mapping && !PageDirty(page)) { /* * If a page had an extra reference in * deactivate_page(), we will find it here. * Now the page is really freeable, so we * move it to the inactive_clean list. */ del_page_from_inactive_dirty_list(page); add_page_to_inactive_clean_list(page); UnlockPage(page); cleaned_pages++; } else { page_active: /* * OK, we don't know what to do with the page. * It's no use keeping it here, so we move it to * the active list. */ del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); UnlockPage(page); } } spin_unlock(&pagemap_lru_lock); /* * If we don't have enough free pages, we loop back once * to queue the dirty pages for writeout. When we were called * by a user process (that /needs/ a free page) and we didn't * free anything yet, we wait synchronously on the writeout of * MAX_SYNC_LAUNDER pages. * * We also wake up bdflush, since bdflush should, under most * loads, flush out the dirty pages before we have to wait on * IO. */ if (can_get_io_locks && !launder_loop && free_shortage()) { launder_loop = 1; /* If we cleaned pages, never do synchronous IO. */ if (cleaned_pages) sync = 0; /* We only do a few "out of order" flushes. */ maxlaunder = MAX_LAUNDER; /* Kflushd takes care of the rest. */ wakeup_bdflush(0); goto dirty_page_rescan; } /* Return the number of pages moved to the inactive_clean list. */ return cleaned_pages; }
/** * reclaim_page - reclaims one page from the inactive_clean list * @zone: reclaim a page from this zone * * The pages on the inactive_clean can be instantly reclaimed. * The tests look impressive, but most of the time we'll grab * the first page of the list and exit successfully. */ struct page * reclaim_page(zone_t * zone) { struct page * page = NULL; struct list_head * page_lru; int maxscan; /* * We only need the pagemap_lru_lock if we don't reclaim the page, * but we have to grab the pagecache_lock before the pagemap_lru_lock * to avoid deadlocks and most of the time we'll succeed anyway. */ spin_lock(&pagecache_lock); spin_lock(&pagemap_lru_lock); maxscan = zone->inactive_clean_pages; while ((page_lru = zone->inactive_clean_list.prev) != &zone->inactive_clean_list && maxscan--) { page = list_entry(page_lru, struct page, lru); /* Wrong page on list?! (list corruption, should not happen) */ if (!PageInactiveClean(page)) { printk("VM: reclaim_page, wrong page on list.\n"); list_del(page_lru); page->zone->inactive_clean_pages--; continue; } /* Page is or was in use? Move it to the active list. */ if (PageTestandClearReferenced(page) || page->age > 0 || (!page->buffers && page_count(page) > 1)) { del_page_from_inactive_clean_list(page); add_page_to_active_list(page); continue; } /* The page is dirty, or locked, move to inactive_dirty list. */ if (page->buffers || PageDirty(page) || TryLockPage(page)) { del_page_from_inactive_clean_list(page); add_page_to_inactive_dirty_list(page); continue; } /* OK, remove the page from the caches. */ if (PageSwapCache(page)) { __delete_from_swap_cache(page); goto found_page; } if (page->mapping) { __remove_inode_page(page); goto found_page; } /* We should never ever get here. */ printk(KERN_ERR "VM: reclaim_page, found unknown page\n"); list_del(page_lru); zone->inactive_clean_pages--; UnlockPage(page); } /* Reset page pointer, maybe we encountered an unfreeable page. */ page = NULL; goto out; found_page: del_page_from_inactive_clean_list(page); UnlockPage(page); page->age = PAGE_AGE_START; if (page_count(page) != 1) printk("VM: reclaim_page, found page with count %d!\n", page_count(page)); out: spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); memory_pressure++; return page; }
/* * The swap-out functions return 1 if they successfully * threw something out, and we got a free page. It returns * zero if it couldn't do anything, and any other value * indicates it decreased rss, but the page was shared. * * NOTE! If it sleeps, it *must* return 1 to make sure we * don't continue with the swap-out. Otherwise we may be * using a process that no longer actually exists (it might * have died while we slept). */ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { pte_t pte; swp_entry_t entry; struct page * page; int onlist; pte = *page_table; if (!pte_present(pte)) goto out_failed; page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) goto out_failed; if (mm->swap_cnt) mm->swap_cnt--; onlist = PageActive(page); /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { age_page_up(page); goto out_failed; } if (!onlist) /* The page is still mapped, so it can't be freeable... */ age_page_down_ageonly(page); /* * If the page is in active use by us, or if the page * is in active use by others, don't unmap it or * (worse) start unneeded IO. */ if (page->age > 0) goto out_failed; if (TryLockPage(page)) goto out_failed; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ pte = ptep_get_and_clear(page_table); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. * * Return 0, as we didn't actually free any real * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { entry.val = page->index; if (pte_dirty(pte)) set_page_dirty(page); set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: UnlockPage(page); mm->rss--; flush_tlb_page(vma, address); deactivate_page(page); page_cache_release(page); out_failed: return 0; } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ flush_cache_page(vma, address); if (!pte_dirty(pte)) goto drop_pte; /* * Ok, it's really dirty. That means that * we should either create a new swap cache * entry for it, or we should write it back * to its own backing store. */ if (page->mapping) { set_page_dirty(page); goto drop_pte; } /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ entry = get_swap_page(); if (!entry.val) goto out_unlock_restore; /* No swap space left */ /* Add it to the swap cache and mark it dirty */ add_to_swap_cache(page, entry); set_page_dirty(page); goto set_swap_pte; out_unlock_restore: set_pte(page_table, pte); UnlockPage(page); return 0; }
/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with the page table read-lock held, and need to exit without * it. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; /* * We can avoid the copy if: * - we're the only user (count == 1) * - the only other user is the swap cache, * and the only swap cache user is itself, * in which case we can just continue to * use the same swap cache (it will be * marked dirty). */ switch (page_count(old_page)) { case 2: /* * Lock the page so that no one can look it up from * the swap cache, grab a reference and start using it. * Can not do lock_page, holding page_table_lock. */ if (!PageSwapCache(old_page) || TryLockPage(old_page)) break; if (is_page_shared(old_page)) { UnlockPage(old_page); break; } UnlockPage(old_page); /* FallThrough */ case 1: flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } /* * Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); new_page = page_cache_alloc(); if (!new_page) return -1; spin_lock(&mm->page_table_lock); /* * Re-check the pte - we dropped the lock */ if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, old_page, new_page, address, page_table); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; }
/* * _hash_droplock() -- Release an lmgr lock. */ void _hash_droplock(Relation rel, BlockNumber whichlock, int access) { if (USELOCKING(rel)) UnlockPage(rel, whichlock, access); }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) { pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page->zone, classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry); set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */ preserve: set_pte(page_table, pte); UnlockPage(page); return 0; }
static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) { struct address_space * mapping; unsigned long index; int offset, size, err; err = -EIO; err = 0; mapping = rd_bdev[minor]->bd_inode->i_mapping; index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; size = sbh->b_size; do { int count; struct page ** hash; struct page * page; char * src, * dst; int unlock = 0; count = PAGE_CACHE_SIZE - offset; if (count > size) count = size; size -= count; hash = page_hash(mapping, index); page = __find_get_page(mapping, index, hash); if (!page) { page = grab_cache_page(mapping, index); err = -ENOMEM; if (!page) goto out; err = 0; if (!Page_Uptodate(page)) { memset(kmap(page), 0, PAGE_CACHE_SIZE); kunmap(page); SetPageUptodate(page); } unlock = 1; } index++; if (rw == READ) { src = kmap(page); src += offset; dst = bh_kmap(sbh); } else { dst = kmap(page); dst += offset; src = bh_kmap(sbh); } offset = 0; memcpy(dst, src, count); kunmap(page); bh_kunmap(sbh); if (rw == READ) { flush_dcache_page(page); } else { SetPageDirty(page); } if (unlock) UnlockPage(page); __free_page(page); } while (size); out: return err; }