/* mm->page_table_lock is held. mmap_sem is not held */ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) { pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page_zone(page), classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry); set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */ preserve: set_pte(page_table, pte); UnlockPage(page); return 0; }
/* * Read a directory, using filldir to fill the dirent memory. * smb_proc_readdir does the actual reading from the smb server. * * The cache code is almost directly taken from ncpfs */ static int smb_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct dentry *dentry = filp->f_path.dentry; struct inode *dir = dentry->d_inode; struct smb_sb_info *server = server_from_dentry(dentry); union smb_dir_cache *cache = NULL; struct smb_cache_control ctl; struct page *page = NULL; int result; ctl.page = NULL; ctl.cache = NULL; VERBOSE("reading %s/%s, f_pos=%d\n", DENTRY_PATH(dentry), (int) filp->f_pos); result = 0; lock_kernel(); switch ((unsigned int) filp->f_pos) { case 0: if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0) goto out; filp->f_pos = 1; /* fallthrough */ case 1: if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0) goto out; filp->f_pos = 2; } /* * Make sure our inode is up-to-date. */ result = smb_revalidate_inode(dentry); if (result) goto out; page = grab_cache_page(&dir->i_data, 0); if (!page) goto read_really; ctl.cache = cache = kmap(page); ctl.head = cache->head; if (!PageUptodate(page) || !ctl.head.eof) { VERBOSE("%s/%s, page uptodate=%d, eof=%d\n", DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof); goto init_cache; } if (filp->f_pos == 2) { if (jiffies - ctl.head.time >= SMB_MAX_AGE(server)) goto init_cache; /* * N.B. ncpfs checks mtime of dentry too here, we don't. * 1. common smb servers do not update mtime on dir changes * 2. it requires an extra smb request * (revalidate has the same timeout as ctl.head.time) * * Instead smbfs invalidates its own cache on local changes * and remote changes are not seen until timeout. */ } if (filp->f_pos > ctl.head.end) goto finished; ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2); ctl.ofs = ctl.fpos / SMB_DIRCACHE_SIZE; ctl.idx = ctl.fpos % SMB_DIRCACHE_SIZE; for (;;) { if (ctl.ofs != 0) { ctl.page = find_lock_page(&dir->i_data, ctl.ofs); if (!ctl.page) goto invalid_cache; ctl.cache = kmap(ctl.page); if (!PageUptodate(ctl.page)) goto invalid_cache; } while (ctl.idx < SMB_DIRCACHE_SIZE) { struct dentry *dent; int res; dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx], dentry, filp->f_pos); if (!dent) goto invalid_cache; res = filldir(dirent, dent->d_name.name, dent->d_name.len, filp->f_pos, dent->d_inode->i_ino, DT_UNKNOWN); dput(dent); if (res) goto finished; filp->f_pos += 1; ctl.idx += 1; if (filp->f_pos > ctl.head.end) goto finished; } if (ctl.page) { kunmap(ctl.page); SetPageUptodate(ctl.page); unlock_page(ctl.page); page_cache_release(ctl.page); ctl.page = NULL; } ctl.idx = 0; ctl.ofs += 1; } invalid_cache: if (ctl.page) { kunmap(ctl.page); unlock_page(ctl.page); page_cache_release(ctl.page); ctl.page = NULL; } ctl.cache = cache; init_cache: smb_invalidate_dircache_entries(dentry); ctl.head.time = jiffies; ctl.head.eof = 0; ctl.fpos = 2; ctl.ofs = 0; ctl.idx = SMB_DIRCACHE_START; ctl.filled = 0; ctl.valid = 1; read_really: result = server->ops->readdir(filp, dirent, filldir, &ctl); if (result == -ERESTARTSYS && page) ClearPageUptodate(page); if (ctl.idx == -1) goto invalid_cache; /* retry */ ctl.head.end = ctl.fpos - 1; ctl.head.eof = ctl.valid; finished: if (page) { cache->head = ctl.head; kunmap(page); if (result != -ERESTARTSYS) SetPageUptodate(page); unlock_page(page); page_cache_release(page); } if (ctl.page) { kunmap(ctl.page); SetPageUptodate(ctl.page); unlock_page(ctl.page); page_cache_release(ctl.page); } out: unlock_kernel(); return result; }
static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct page *dir_page = NULL; struct nilfs_dir_entry *dir_de = NULL; struct page *old_page; struct nilfs_dir_entry *old_de; struct nilfs_transaction_info ti; int err; err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = nilfs_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { struct page *new_page; struct nilfs_dir_entry *new_de; err = -ENOTEMPTY; if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; nilfs_set_link(new_dir, new_de, new_page, old_inode); nilfs_mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); } else { err = nilfs_add_link(new_dentry, old_inode); if (err) goto out_dir; if (dir_de) { inc_nlink(new_dir); nilfs_mark_inode_dirty(new_dir); } } /* * Like most other Unix systems, set the ctime for inodes on a * rename. */ old_inode->i_ctime = CURRENT_TIME; nilfs_delete_entry(old_de, old_page); if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); drop_nlink(old_dir); } nilfs_mark_inode_dirty(old_dir); nilfs_mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); return err; out_dir: if (dir_de) { kunmap(dir_page); page_cache_release(dir_page); } out_old: kunmap(old_page); page_cache_release(old_page); out: nilfs_transaction_abort(old_dir->i_sb); return err; }
/** * write_one_page - write out a single page and optionally wait on I/O * @page: the page to write * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. */ int write_one_page(struct page *page, int wait) { struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); if (wait) wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { page_cache_get(page); ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0 && wait) { wait_on_page_writeback(page); if (PageError(page)) ret = -EIO; } page_cache_release(page); } else { unlock_page(page); } return ret; } EXPORT_SYMBOL(write_one_page); /* * For address_spaces which do not use buffers nor write back. */ int __set_page_dirty_no_writeback(struct page *page) { if (!PageDirty(page)) return !TestSetPageDirty(page); return 0; } /* * Helper function for set_page_dirty family. * NOTE: This relies on being atomic wrt interrupts. */ void account_page_dirtied(struct page *page, struct address_space *mapping) { if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_DIRTIED); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } } EXPORT_SYMBOL(account_page_dirtied); /* * Helper function for set_page_writeback family. * NOTE: Unlike account_page_dirtied this does not rely on being atomic * wrt interrupts. */ void account_page_writeback(struct page *page) { inc_zone_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_WRITTEN); }
/* * Attempts to free an entry by adding a page to the swap cache, * decompressing the entry data into the page, and issuing a * bio write to write the page back to the swap device. * * This can be thought of as a "resumed writeback" of the page * to the swap device. We are basically resuming the same swap * writeback path that was intercepted with the frontswap_store() * in the first place. After the page has been decompressed into * the swap cache, the compressed version stored by zswap can be * freed. */ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) { struct zswap_header *zhdr; swp_entry_t swpentry; struct zswap_tree *tree; pgoff_t offset; struct zswap_entry *entry; struct page *page; u8 *src, *dst; unsigned int dlen; int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; /* extract swpentry from data */ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); swpentry = zhdr->swpentry; /* here */ zpool_unmap_handle(pool, handle); tree = zswap_trees[swp_type(swpentry)]; offset = swp_offset(swpentry); /* find and ref zswap entry */ spin_lock(&tree->lock); entry = zswap_entry_find_get(&tree->rbroot, offset); if (!entry) { /* entry was invalidated */ spin_unlock(&tree->lock); return 0; } spin_unlock(&tree->lock); BUG_ON(offset != entry->offset); /* try to allocate swap cache page */ switch (zswap_get_swap_cache_page(swpentry, &page)) { case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ ret = -ENOMEM; goto fail; case ZSWAP_SWAPCACHE_EXIST: /* page is already in the swap cache, ignore for now */ page_cache_release(page); ret = -EEXIST; goto fail; case ZSWAP_SWAPCACHE_NEW: /* page is locked */ /* decompress */ dlen = PAGE_SIZE; src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, ZPOOL_MM_RO) + sizeof(struct zswap_header); dst = kmap_atomic(page); ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, dst, &dlen); kunmap_atomic(dst); zpool_unmap_handle(zswap_pool, entry->handle); BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); /* page is up to date */ SetPageUptodate(page); } /* move it to the tail of the inactive list after end_writeback */ SetPageReclaim(page); /* start writeback */ __swap_writepage(page, &wbc, end_swap_bio_write); page_cache_release(page); zswap_written_back_pages++; spin_lock(&tree->lock); /* drop local reference */ zswap_entry_put(tree, entry); /* * There are two possible situations for entry here: * (1) refcount is 1(normal case), entry is valid and on the tree * (2) refcount is 0, entry is freed and not on the tree * because invalidate happened during writeback * search the tree and free the entry if find entry */ if (entry == zswap_rb_search(&tree->rbroot, offset)) zswap_entry_put(tree, entry); spin_unlock(&tree->lock); goto end; /* * if we get here due to ZSWAP_SWAPCACHE_EXIST * a load may happening concurrently * it is safe and okay to not free the entry * if we free the entry in the following put * it it either okay to return !0 */ fail: spin_lock(&tree->lock); zswap_entry_put(tree, entry); spin_unlock(&tree->lock); end: return ret; }
static int mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep) { struct mem_cgroup *memcg; pte_t _dst_pte, *dst_pte; spinlock_t *ptl; void *page_kaddr; int ret; struct page *page; if (!*pagep) { ret = -ENOMEM; page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr); if (!page) goto out; page_kaddr = kmap_atomic(page); ret = copy_from_user(page_kaddr, (const void __user *) src_addr, PAGE_SIZE); kunmap_atomic(page_kaddr); /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { ret = -EFAULT; *pagep = page; /* don't free the page */ goto out; } } else { page = *pagep; *pagep = NULL; } /* * The memory barrier inside __SetPageUptodate makes sure that * preceeding stores to the page contents become visible before * the set_pte_at() write. */ __SetPageUptodate(page); ret = -ENOMEM; if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false)) goto out_release; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; inc_mm_counter(dst_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, dst_vma); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); pte_unmap_unlock(dst_pte, ptl); ret = 0; out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); mem_cgroup_cancel_charge(page, memcg, false); out_release: page_cache_release(page); goto out; }
/* * Prepare the write for the inline data. * If the the data can be written into the inode, we just read * the page and make it uptodate, and start the journal. * Otherwise read the page, makes it dirty so that it can be * handle in writepages(the i_disksize update is left to the * normal ext4_da_write_end). */ int ext4_da_write_inline_data_begin(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { int ret, inline_size; handle_t *handle; struct page *page; struct ext4_iloc iloc; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } inline_size = ext4_get_max_inline_size(inode); ret = -ENOSPC; if (inline_size >= pos + len) { ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) goto out; } if (ret == -ENOSPC) { ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); goto out; } /* * We cannot recurse into the filesystem as the transaction * is already started. */ flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; goto out; } down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ret = 0; goto out_release_page; } if (!PageUptodate(page)) { ret = ext4_read_inline_page(inode, page); if (ret < 0) goto out_release_page; } up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; handle = NULL; brelse(iloc.bh); return 1; out_release_page: up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); page_cache_release(page); out: if (handle) ext4_journal_stop(handle); brelse(iloc.bh); return ret; }
int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process) { const int cPages = cb >> PAGE_SHIFT; struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process); struct vm_area_struct **papVMAs; PRTR0MEMOBJLNX pMemLnx; int rc = VERR_NO_MEMORY; NOREF(fAccess); /* * Check for valid task and size overflows. */ if (!pTask) return VERR_NOT_SUPPORTED; if (((size_t)cPages << PAGE_SHIFT) != cb) return VERR_OUT_OF_RANGE; /* * Allocate the memory object and a temporary buffer for the VMAs. */ pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb); if (!pMemLnx) return VERR_NO_MEMORY; papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages); if (papVMAs) { down_read(&pTask->mm->mmap_sem); /* * Get user pages. */ rc = get_user_pages(pTask, /* Task for fault acounting. */ pTask->mm, /* Whose pages. */ R3Ptr, /* Where from. */ cPages, /* How many pages. */ 1, /* Write to memory. */ 0, /* force. */ &pMemLnx->apPages[0], /* Page array. */ papVMAs); /* vmas */ if (rc == cPages) { /* * Flush dcache (required?), protect against fork and _really_ pin the page * table entries. get_user_pages() will protect against swapping out the * pages but it will NOT protect against removing page table entries. This * can be achieved with * - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires * an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...). * Usual Linux distributions support only a limited size of locked pages * (e.g. 32KB). * - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages() * or by * - setting the VM_LOCKED flag. This is the same as doing mlock() without * a range check. */ /** @todo The Linux fork() protection will require more work if this API * is to be used for anything but locking VM pages. */ while (rc-- > 0) { flush_dcache_page(pMemLnx->apPages[rc]); papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED); } up_read(&pTask->mm->mmap_sem); RTMemFree(papVMAs); pMemLnx->Core.u.Lock.R0Process = R0Process; pMemLnx->cPages = cPages; Assert(!pMemLnx->fMappedToRing0); *ppMem = &pMemLnx->Core; return VINF_SUCCESS; } /* * Failed - we need to unlock any pages that we succeeded to lock. */ while (rc-- > 0) { if (!PageReserved(pMemLnx->apPages[rc])) SetPageDirty(pMemLnx->apPages[rc]); page_cache_release(pMemLnx->apPages[rc]); } up_read(&pTask->mm->mmap_sem); RTMemFree(papVMAs); rc = VERR_LOCK_FAILED; } rtR0MemObjDelete(&pMemLnx->Core); return rc; }
int j4fs_write_begin(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct page *pg = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t offset = pos & (PAGE_CACHE_SIZE - 1); uint32_t to = offset + len; int ret = 0; int space_held = 0; if(j4fs_panic==1) { T(J4FS_TRACE_ALWAYS,("%s %d: j4fs panic\n",__FUNCTION__,__LINE__)); return -ENOSPC; } T(J4FS_TRACE_FS, ("start j4fs_write_begin\n")); if(to>PAGE_CACHE_SIZE) { T(J4FS_TRACE_ALWAYS,("%s %d: page size overflow(pos,index,offset,len,to)=(%d,%d,%d,%d,%d)\n",__FUNCTION__,__LINE__,pos,index,offset,len,to)); j4fs_panic("page size overflow"); return -ENOSPC; } /* Get a page */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) pg = grab_cache_page_write_begin(mapping, index, flags); #else pg = __grab_cache_page(mapping, index); #endif *pagep = pg; if (!pg) { ret = -ENOMEM; goto out; } /* Get fs space */ space_held = j4fs_hold_space(PAGE_CACHE_SIZE); if (!space_held) { ret = -ENOSPC; goto out; } /* Update page if required */ if (!Page_Uptodate(pg) && (offset || to < PAGE_CACHE_SIZE)) ret = j4fs_readpage_nolock(filp, pg); if (ret) goto out; /* Happy path return */ T(J4FS_TRACE_FS, ("end j4fs_write_begin - ok\n")); return 0; out: T(J4FS_TRACE_FS, ("end j4fs_write_begin fail returning %d\n", ret)); if (pg) { unlock_page(pg); page_cache_release(pg); } return ret; }
/* * Locate a page of swap in physical memory, reserving swap cache space * and reading the disk if it is not already cached. * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. */ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; int err; do { /* * First check the swap cache. Since this is normally * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ found_page = find_get_page(&swapper_space, entry.val); if (found_page) break; /* * Get a new page to read into from swap. */ if (!new_page) { new_page = alloc_page_vma(gfp_mask, vma, addr); if (!new_page) break; /* Out of memory */ } /* * call radix_tree_preload() while we can wait. */ err = radix_tree_preload(gfp_mask & GFP_KERNEL); if (err) break; /* * Swap entry may have been freed since our caller observed it. */ err = swapcache_prepare(entry); if (err == -EEXIST) { radix_tree_preload_end(); /* * We might race against get_swap_page() and stumble * across a SWAP_HAS_CACHE swap_map entry whose page * has not been brought into the swapcache yet, while * the other end is scheduled away waiting on discard * I/O completion at scan_swap_map(). * * In order to avoid turning this transitory state * into a permanent loop around this -EEXIST case * if !CONFIG_PREEMPT and the I/O completion happens * to be waiting on the CPU waitqueue where we are now * busy looping, we just conditionally invoke the * scheduler here, if there are some more important * tasks to run. */ cond_resched(); continue; } if (err) { /* swp entry is obsolete ? */ radix_tree_preload_end(); break; } /* May fail (-ENOMEM) if radix-tree node allocation failed. */ __set_page_locked(new_page); SetPageSwapBacked(new_page); err = __add_to_swap_cache(new_page, entry); if (likely(!err)) { radix_tree_preload_end(); /* * Initiate read into locked page and return. */ lru_cache_add_anon(new_page); swap_readpage(new_page); return new_page; } radix_tree_preload_end(); ClearPageSwapBacked(new_page); __clear_page_locked(new_page); /* * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ swapcache_free(entry, NULL); } while (err != -ENOMEM); if (new_page) page_cache_release(new_page); return found_page; }
int rtR0MemObjNativeFree(RTR0MEMOBJ pMem) { PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem; /* * Release any memory that we've allocated or locked. */ switch (pMemLnx->Core.enmType) { case RTR0MEMOBJTYPE_LOW: case RTR0MEMOBJTYPE_PAGE: case RTR0MEMOBJTYPE_CONT: case RTR0MEMOBJTYPE_PHYS: case RTR0MEMOBJTYPE_PHYS_NC: rtR0MemObjLinuxVUnmap(pMemLnx); rtR0MemObjLinuxFreePages(pMemLnx); break; case RTR0MEMOBJTYPE_LOCK: if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS) { struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process); size_t iPage; Assert(pTask); if (pTask && pTask->mm) down_read(&pTask->mm->mmap_sem); iPage = pMemLnx->cPages; while (iPage-- > 0) { if (!PageReserved(pMemLnx->apPages[iPage])) SetPageDirty(pMemLnx->apPages[iPage]); page_cache_release(pMemLnx->apPages[iPage]); } if (pTask && pTask->mm) up_read(&pTask->mm->mmap_sem); } /* else: kernel memory - nothing to do here. */ break; case RTR0MEMOBJTYPE_RES_VIRT: Assert(pMemLnx->Core.pv); if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS) { struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process); Assert(pTask); if (pTask && pTask->mm) { MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb); } } else { vunmap(pMemLnx->Core.pv); Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL); __free_page(pMemLnx->apPages[0]); pMemLnx->apPages[0] = NULL; pMemLnx->cPages = 0; } pMemLnx->Core.pv = NULL; break; case RTR0MEMOBJTYPE_MAPPING: Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv); if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS) { struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process); Assert(pTask); if (pTask && pTask->mm) { MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb); } } else vunmap(pMemLnx->Core.pv); pMemLnx->Core.pv = NULL; break; default: AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType)); return VERR_INTERNAL_ERROR; } return VINF_SUCCESS; }
/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. */ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; unsigned short *swap_map; unsigned short swcount; struct page *page; swp_entry_t entry; int i = 0; int retval = 0; int reset_overflow = 0; /* * When searching mms for an entry, a good strategy is to * start at the first mm we freed the previous entry from * (though actually we don't notice whether we or coincidence * freed the entry). Initialize this start_mm with a hold. * * A simpler strategy would be to start at the last mm we * freed the previous entry from; but that would take less * advantage of mmlist ordering (now preserved by swap_out()), * which clusters forked address spaces together, most recent * child immediately after parent. If we race with dup_mmap(), * we very much want to resolve parent before child, otherwise * we may miss some entries: using last mm would invert that. */ start_mm = &init_mm; atomic_inc(&init_mm.mm_users); /* * Keep on scanning until all entries have gone. Usually, * one pass through swap_map is enough, but not necessarily: * mmput() removes mm from mmlist before exit_mmap() and its * zap_page_range(). That's not too bad, those entries are * on their way out, and handled faster there than here. * do_munmap() behaves similarly, taking the range out of mm's * vma list before zap_page_range(). But unfortunately, when * unmapping a part of a vma, it takes the whole out first, * then reinserts what's left after (might even reschedule if * open() method called) - so swap entries may be invisible * to swapoff for a while, then reappear - but that is rare. */ while ((i = find_next_to_unuse(si, i))) { /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = SWP_ENTRY(type, i); page = read_swap_cache_async(entry); if (!page) { /* * Either swap_duplicate() failed because entry * has been freed independently, and will not be * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ if (!*swap_map) continue; retval = -ENOMEM; break; } /* * Don't hold on to start_mm if it looks like exiting. */ if (atomic_read(&start_mm->mm_users) == 1) { mmput(start_mm); start_mm = &init_mm; atomic_inc(&init_mm.mm_users); } /* * Wait for and lock page. When do_swap_page races with * try_to_unuse, do_swap_page can handle the fault much * faster than try_to_unuse can locate the entry. This * apparently redundant "wait_on_page" lets try_to_unuse * defer to do_swap_page in such a case - in some tests, * do_swap_page and try_to_unuse repeatedly compete. */ wait_on_page(page); lock_page(page); /* * Remove all references to entry, without blocking. * Whenever we reach init_mm, there's no address space * to search, but use it as a reminder to search shmem. */ swcount = *swap_map; if (swcount > 1) { flush_page_to_ram(page); if (start_mm == &init_mm) shmem_unuse(entry, page); else unuse_process(start_mm, entry, page); } if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; struct mm_struct *mm; spin_lock(&mmlist_lock); while (*swap_map > 1 && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; if (mm == &init_mm) { set_start_mm = 1; shmem_unuse(entry, page); } else unuse_process(mm, entry, page); if (set_start_mm && *swap_map < swcount) { new_start_mm = mm; set_start_mm = 0; } } atomic_inc(&new_start_mm->mm_users); spin_unlock(&mmlist_lock); mmput(start_mm); start_mm = new_start_mm; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { swap_list_lock(); swap_device_lock(si); nr_swap_pages++; *swap_map = 1; swap_device_unlock(si); swap_list_unlock(); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_swap_out could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. * Note shmem_unuse already deleted its from swap cache. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { rw_swap_page(WRITE, page); lock_page(page); } if (PageSwapCache(page)) delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so try_to_swap_out will preserve it. */ SetPageDirty(page); UnlockPage(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. Interruptible check on * signal_pending() would be nice, but changes the spec? */ if (current->need_resched) schedule(); }
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, sector_t pblocknr, struct buffer_head **pbh, int newblk) { struct buffer_head *bh; struct inode *inode = NILFS_BTNC_I(btnc); int err; bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); if (unlikely(!bh)) return -ENOMEM; err = -EEXIST; /* internal code */ if (newblk) { if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { brelse(bh); BUG(); } memset(bh->b_data, 0, 1 << inode->i_blkbits); bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); goto found; } if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; if (pblocknr == 0) { pblocknr = blocknr; if (inode->i_ino != NILFS_DAT_INO) { struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); /* blocknr is a virtual block number */ err = nilfs_dat_translate(dat, blocknr, &pblocknr); if (unlikely(err)) { brelse(bh); goto out_locked; } } } lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); err = -EEXIST; /* internal code */ goto found; } set_buffer_mapped(bh); bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); submit_bh(READ, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ err = 0; found: *pbh = bh; out_locked: unlock_page(bh->b_page); page_cache_release(bh->b_page); return err; }
/* * Create dentry/inode for this file and add it to the dircache. */ int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry) { struct dentry *newdent, *dentry = filp->f_dentry; struct inode *newino, *inode = dentry->d_inode; struct smb_cache_control ctl = *ctrl; int valid = 0; int hashed = 0; ino_t ino = 0; qname->hash = full_name_hash(qname->name, qname->len); if (dentry->d_op && dentry->d_op->d_hash) if (dentry->d_op->d_hash(dentry, qname) != 0) goto end_advance; newdent = d_lookup(dentry, qname); if (!newdent) { newdent = d_alloc(dentry, qname); if (!newdent) goto end_advance; } else { hashed = 1; memcpy((char *) newdent->d_name.name, qname->name, newdent->d_name.len); } if (!newdent->d_inode) { smb_renew_times(newdent); entry->f_ino = iunique(inode->i_sb, 2); newino = smb_iget(inode->i_sb, entry); if (newino) { smb_new_dentry(newdent); d_instantiate(newdent, newino); if (!hashed) d_rehash(newdent); } } else smb_set_inode_attr(newdent->d_inode, entry); if (newdent->d_inode) { ino = newdent->d_inode->i_ino; newdent->d_fsdata = (void *) ctl.fpos; smb_new_dentry(newdent); } if (ctl.idx >= SMB_DIRCACHE_SIZE) { if (ctl.page) { kunmap(ctl.page); SetPageUptodate(ctl.page); unlock_page(ctl.page); page_cache_release(ctl.page); } ctl.cache = NULL; ctl.idx -= SMB_DIRCACHE_SIZE; ctl.ofs += 1; ctl.page = grab_cache_page(&inode->i_data, ctl.ofs); if (ctl.page) ctl.cache = kmap(ctl.page); } if (ctl.cache) { ctl.cache->dentry[ctl.idx] = newdent; valid = 1; } dput(newdent); end_advance: if (!valid) ctl.valid = 0; if (!ctl.filled && (ctl.fpos == filp->f_pos)) { if (!ino) ino = find_inode_number(dentry, qname); if (!ino) ino = iunique(inode->i_sb, 2); ctl.filled = filldir(dirent, qname->name, qname->len, filp->f_pos, ino, DT_UNKNOWN); if (!ctl.filled) filp->f_pos += 1; } ctl.fpos += 1; ctl.idx += 1; *ctrl = ctl; return (ctl.valid || !ctl.filled); }
/* * Perform a free_page(), also freeing any swap cache associated with * this page if it is the last user of the page. */ void free_page_and_swap_cache(struct page *page) { free_swap_cache(page); page_cache_release(page); }
/* * Returns a pointer to a buffer containing at least LEN bytes of * filesystem starting at byte offset OFFSET into the filesystem. */ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer, unread; unsigned long devsize; char *data; if (!len) return NULL; blocknr = offset >> PAGE_CACHE_SHIFT; offset &= PAGE_CACHE_SIZE - 1; /* Check if an existing buffer already has the data.. */ for (i = 0; i < READ_BUFFERS; i++) { unsigned int blk_offset; if (buffer_dev[i] != sb) continue; if (blocknr < buffer_blocknr[i]) continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT; blk_offset += offset; if (blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT; /* Ok, read in BLKS_PER_BUF pages completely first. */ unread = 0; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; if (blocknr + i < devsize) { page = read_mapping_page(mapping, blocknr + i, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; } pages[i] = page; } for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { wait_on_page_locked(page); if (!PageUptodate(page)) { /* asynchronous error */ page_cache_release(page); pages[i] = NULL; } } } buffer = next_buffer; next_buffer = NEXT_BUFFER(buffer); buffer_blocknr[buffer] = blocknr; buffer_dev[buffer] = sb; data = read_buffers[buffer]; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { memcpy(data, kmap(page), PAGE_CACHE_SIZE); kunmap(page); page_cache_release(page); } else memset(data, 0, PAGE_CACHE_SIZE); data += PAGE_CACHE_SIZE; } return read_buffers[buffer] + offset; }
static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, bool zeropage) { struct vm_area_struct *dst_vma; ssize_t err; pmd_t *dst_pmd; unsigned long src_addr, dst_addr; long copied; struct page *page; /* * Sanitize the command parameters: */ BUG_ON(dst_start & ~PAGE_MASK); BUG_ON(len & ~PAGE_MASK); /* Does the address range wrap, or is the span zero-sized? */ BUG_ON(src_start + len <= src_start); BUG_ON(dst_start + len <= dst_start); src_addr = src_start; dst_addr = dst_start; copied = 0; page = NULL; retry: down_read(&dst_mm->mmap_sem); /* * Make sure the vma is not shared, that the dst range is * both valid and fully within a single existing vma. */ err = -EINVAL; dst_vma = find_vma(dst_mm, dst_start); if (!dst_vma || (dst_vma->vm_flags & VM_SHARED)) goto out_unlock; if (dst_start < dst_vma->vm_start || dst_start + len > dst_vma->vm_end) goto out_unlock; /* * Be strict and only allow __mcopy_atomic on userfaultfd * registered ranges to prevent userland errors going * unnoticed. As far as the VM consistency is concerned, it * would be perfectly safe to remove this check, but there's * no useful usage for __mcopy_atomic ouside of userfaultfd * registered ranges. This is after all why these are ioctls * belonging to the userfaultfd and not syscalls. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; /* * FIXME: only allow copying on anonymous vmas, tmpfs should * be added. */ if (dst_vma->vm_ops) goto out_unlock; /* * Ensure the dst_vma has a anon_vma or this page * would get a NULL anon_vma when moved in the * dst_vma. */ err = -ENOMEM; if (unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; while (src_addr < src_start + len) { pmd_t dst_pmdval; BUG_ON(dst_addr >= dst_start + len); dst_pmd = mm_alloc_pmd(dst_mm, dst_addr); if (unlikely(!dst_pmd)) { err = -ENOMEM; break; } dst_pmdval = pmd_read_atomic(dst_pmd); /* * If the dst_pmd is mapped as THP don't * override it and just be strict. */ if (unlikely(pmd_trans_huge(dst_pmdval))) { err = -EEXIST; break; } if (unlikely(pmd_none(dst_pmdval)) && unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) { err = -ENOMEM; break; } /* If an huge pmd materialized from under us fail */ if (unlikely(pmd_trans_huge(*dst_pmd))) { err = -EFAULT; break; } BUG_ON(pmd_none(*dst_pmd)); BUG_ON(pmd_trans_huge(*dst_pmd)); if (!zeropage) err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, &page); else err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); cond_resched(); if (unlikely(err == -EFAULT)) { void *page_kaddr; up_read(&dst_mm->mmap_sem); BUG_ON(!page); page_kaddr = kmap(page); err = copy_from_user(page_kaddr, (const void __user *) src_addr, PAGE_SIZE); kunmap(page); if (unlikely(err)) { err = -EFAULT; goto out; } goto retry; } else BUG_ON(page); if (!err) { dst_addr += PAGE_SIZE; src_addr += PAGE_SIZE; copied += PAGE_SIZE; if (fatal_signal_pending(current)) err = -EINTR; } if (err) break; } out_unlock: up_read(&dst_mm->mmap_sem); out: if (page) page_cache_release(page); BUG_ON(copied < 0); BUG_ON(err > 0); BUG_ON(!copied && !err); return copied ? copied : err; }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; void *from_virtual; void *to_virtual; int i; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); /* * For user buffers, just unpin the pages, as there should be * vma references. */ if (ttm->page_flags & TTM_PAGE_FLAG_USER) { ttm_tt_free_user_pages(ttm); ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; ttm->swap_storage = NULL; return 0; } if (!persistant_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (unlikely(IS_ERR(swap_storage))) { printk(KERN_ERR "Failed allocating swap storage.\n"); return -ENOMEM; } } else swap_storage = persistant_swap_storage; swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = read_mapping_page(swap_space, i, NULL); if (unlikely(to_page == NULL)) goto out_err; preempt_disable(); from_virtual = kmap_atomic(from_page, KM_USER0); to_virtual = kmap_atomic(to_page, KM_USER1); memcpy(to_virtual, from_virtual, PAGE_SIZE); kunmap_atomic(to_virtual, KM_USER1); kunmap_atomic(from_virtual, KM_USER0); preempt_enable(); set_page_dirty(to_page); mark_page_accessed(to_page); page_cache_release(to_page); } ttm_tt_free_alloced_pages(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistant_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP; return 0; out_err: if (!persistant_swap_storage) fput(swap_storage); return -ENOMEM; }
/* * Try to write data in the inode. * If the inode has inline data, check whether the new write can be * in the inode also. If not, create the page the handle, move the data * to the page make it update and let the later codes create extent for it. */ int ext4_try_to_write_inline_data(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep) { int ret; handle_t *handle; struct page *page; struct ext4_iloc iloc; if (pos + len > ext4_get_max_inline_size(inode)) goto convert; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; /* * The possible write could happen in the inode, * so try to reserve the space in inode first. */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) goto out; /* We don't have space in inline inode, so convert it to extent. */ if (ret == -ENOSPC) { ext4_journal_stop(handle); brelse(iloc.bh); goto convert; } flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; goto out; } *pagep = page; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ret = 0; unlock_page(page); page_cache_release(page); goto out_up_read; } if (!PageUptodate(page)) { ret = ext4_read_inline_page(inode, page); if (ret < 0) goto out_up_read; } ret = 1; handle = NULL; out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: if (handle) ext4_journal_stop(handle); brelse(iloc.bh); return ret; convert: return ext4_convert_inline_data_to_extent(mapping, inode, flags); }
static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *dir_page = NULL; struct ufs_dir_entry * dir_de = NULL; struct page *old_page; struct ufs_dir_entry *old_de; int err = -ENOENT; dquot_initialize(old_dir); dquot_initialize(new_dir); old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = ufs_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { struct page *new_page; struct ufs_dir_entry *new_de; err = -ENOTEMPTY; if (dir_de && !ufs_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; inode_inc_link_count(old_inode); ufs_set_link(new_dir, new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { if (dir_de) { err = -EMLINK; if (new_dir->i_nlink >= UFS_LINK_MAX) goto out_dir; } inode_inc_link_count(old_inode); err = ufs_add_link(new_dentry, old_inode); if (err) { inode_dec_link_count(old_inode); goto out_dir; } if (dir_de) inode_inc_link_count(new_dir); } /* * Like most other Unix systems, set the ctime for inodes on a * rename. * inode_dec_link_count() will mark the inode dirty. */ old_inode->i_ctime = CURRENT_TIME_SEC; ufs_delete_entry(old_dir, old_de, old_page); inode_dec_link_count(old_inode); if (dir_de) { ufs_set_link(old_inode, dir_de, dir_page, new_dir); inode_dec_link_count(old_dir); } return 0; out_dir: if (dir_de) { kunmap(dir_page); page_cache_release(dir_page); } out_old: kunmap(old_page); page_cache_release(old_page); out: return err; }
static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct inode * new_dir, struct dentry * new_dentry) { struct inode * old_inode = old_dentry->d_inode; struct inode * new_inode = new_dentry->d_inode; struct page * dir_page = NULL; struct sysv_dir_entry * dir_de = NULL; struct page * old_page; struct sysv_dir_entry * old_de; int err = -ENOENT; old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = sysv_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { struct page * new_page; struct sysv_dir_entry * new_de; err = -ENOTEMPTY; if (dir_de && !sysv_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = sysv_find_entry(new_dentry, &new_page); if (!new_de) goto out_dir; sysv_set_link(new_de, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { err = sysv_add_link(new_dentry, old_inode); if (err) goto out_dir; if (dir_de) inode_inc_link_count(new_dir); } sysv_delete_entry(old_de, old_page); mark_inode_dirty(old_inode); if (dir_de) { sysv_set_link(dir_de, dir_page, new_dir); inode_dec_link_count(old_dir); } return 0; out_dir: if (dir_de) { kunmap(dir_page); page_cache_release(dir_page); } out_old: kunmap(old_page); page_cache_release(old_page); out: return err; }
/* Internal operations on file data */ static inline void reiserfs_put_page(struct page *page) { kunmap(page); page_cache_release(page); }
/* * zswap_get_swap_cache_page * * This is an adaption of read_swap_cache_async() * * This function tries to find a page with the given swap entry * in the swapper_space address space (the swap cache). If the page * is found, it is returned in retpage. Otherwise, a page is allocated, * added to the swap cache, and returned in retpage. * * If success, the swap cache page is returned in retpage * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, * the new page is added to swapcache and locked * Returns ZSWAP_SWAPCACHE_FAIL on error */ static int zswap_get_swap_cache_page(swp_entry_t entry, struct page **retpage) { struct page *found_page, *new_page = NULL; struct address_space *swapper_space = swap_address_space(entry); int err; *retpage = NULL; do { /* * First check the swap cache. Since this is normally * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ found_page = find_get_page(swapper_space, entry.val); if (found_page) break; /* * Get a new page to read into from swap. */ if (!new_page) { new_page = alloc_page(GFP_KERNEL); if (!new_page) break; /* Out of memory */ } /* * call radix_tree_preload() while we can wait. */ err = radix_tree_preload(GFP_KERNEL); if (err) break; /* * Swap entry may have been freed since our caller observed it. */ err = swapcache_prepare(entry); if (err == -EEXIST) { /* seems racy */ radix_tree_preload_end(); continue; } if (err) { /* swp entry is obsolete ? */ radix_tree_preload_end(); break; } /* May fail (-ENOMEM) if radix-tree node allocation failed. */ __set_page_locked(new_page); SetPageSwapBacked(new_page); err = __add_to_swap_cache(new_page, entry); if (likely(!err)) { radix_tree_preload_end(); lru_cache_add_anon(new_page); *retpage = new_page; return ZSWAP_SWAPCACHE_NEW; } radix_tree_preload_end(); ClearPageSwapBacked(new_page); __clear_page_locked(new_page); /* * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ swapcache_free(entry, NULL); } while (err != -ENOMEM); if (new_page) page_cache_release(new_page); if (!found_page) return ZSWAP_SWAPCACHE_FAIL; *retpage = found_page; return ZSWAP_SWAPCACHE_EXIST; }
static int jffs2_write_begin(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; pg = __grab_cache_page(mapping, index); if (!pg) return -ENOMEM; *pagep = pg; D1(printk(KERN_DEBUG "jffs2_write_begin()\n")); if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; uint32_t alloc_len; D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs)); ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) goto out_page; down(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE); ri.totlen = cpu_to_je32(sizeof(ri)); ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4)); ri.ino = cpu_to_je32(f->inocache->ino); ri.version = cpu_to_je32(++f->highest_version); ri.mode = cpu_to_jemode(inode->i_mode); ri.uid = cpu_to_je16(inode->i_uid); ri.gid = cpu_to_je16(inode->i_gid); ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); ri.offset = cpu_to_je32(inode->i_size); ri.dsize = cpu_to_je32(pageofs - inode->i_size); ri.csize = cpu_to_je32(0); ri.compr = JFFS2_COMPR_ZERO; ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); ri.data_crc = cpu_to_je32(0); fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_NORMAL); if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); up(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); if (f->metadata) { jffs2_mark_node_obsolete(c, f->metadata->raw); jffs2_free_full_dnode(f->metadata); f->metadata = NULL; } if (ret) { D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret)); jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); up(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; up(&f->sem); } /* * Read in the page if it wasn't already present. Cannot optimize away * the whole page write case until jffs2_write_end can handle the * case of a short-copy. */ if (!PageUptodate(pg)) { down(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); up(&f->sem); if (ret) goto out_page; } D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags)); return ret; out_page: unlock_page(pg); page_cache_release(pg); return ret; }
/** * generic_pipe_buf_release - put a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to * * Description: * This function releases a reference to @buf. */ void generic_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_release(buf->page); }
static int jffs2_write_end(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *pg, void *fsdata) { /* Actually commit the write from the page cache page we're looking at. * For now, we write the full page out each time. It sucks, but it's simple */ struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode *ri; unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + copied; unsigned aligned_start = start & ~3; int ret = 0; uint32_t writtenlen = 0; D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); /* We need to avoid deadlock with page_cache_read() in jffs2_garbage_collect_pass(). So the page must be up to date to prevent page_cache_read() from trying to re-lock it. */ BUG_ON(!PageUptodate(pg)); if (end == PAGE_CACHE_SIZE) { /* When writing out the end of a page, write out the _whole_ page. This helps to reduce the number of nodes in files which have many short writes, like syslog files. */ aligned_start = 0; } ri = jffs2_alloc_raw_inode(); if (!ri) { D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n")); unlock_page(pg); page_cache_release(pg); return -ENOMEM; } /* Set the fields that the generic jffs2_write_inode_range() code can't find */ ri->ino = cpu_to_je32(inode->i_ino); ri->mode = cpu_to_jemode(inode->i_mode); ri->uid = cpu_to_je16(inode->i_uid); ri->gid = cpu_to_je16(inode->i_gid); ri->isize = cpu_to_je32((uint32_t)inode->i_size); ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); /* In 2.4, it was already kmapped by generic_file_write(). Doesn't hurt to do it again. The alternative is ifdefs, which are ugly. */ kmap(pg); ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start, (pg->index << PAGE_CACHE_SHIFT) + aligned_start, end - aligned_start, &writtenlen); kunmap(pg); if (ret) { /* There was an error writing. */ SetPageError(pg); } /* Adjust writtenlen for the padding we did, so we don't confuse our caller */ writtenlen -= min(writtenlen, (start - aligned_start)); if (writtenlen) { if (inode->i_size < pos + writtenlen) { inode->i_size = pos + writtenlen; inode->i_blocks = (inode->i_size + 511) >> 9; inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime)); } }
/** * write_one_page - write out a single page and optionally wait on I/O * @page: the page to write * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. */ int write_one_page(struct page *page, int wait) { struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); if (wait) wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { page_cache_get(page); ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0 && wait) { wait_on_page_writeback(page); if (PageError(page)) ret = -EIO; } page_cache_release(page); } else { unlock_page(page); } return ret; } EXPORT_SYMBOL(write_one_page); /* * For address_spaces which do not use buffers nor write back. */ int __set_page_dirty_no_writeback(struct page *page) { if (!PageDirty(page)) SetPageDirty(page); return 0; } /* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * * Most callers have locked the page, which pins the address_space in memory. * But zap_pte_range() does not lock the page, however in that case the * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the * mapping by re-checking page_mapping() insode tree_lock. */ int __set_page_dirty_nobuffers(struct page *page) { if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; if (!mapping) return 1; write_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } return 1; } return 0; } EXPORT_SYMBOL(__set_page_dirty_nobuffers); /* * When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via * redirty_page_for_writepage() and it should then unlock the page and return 0 */ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { wbc->pages_skipped++; return __set_page_dirty_nobuffers(page); }
static int gfs2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct gfs2_inode *ip = GFS2_I(mapping->host); struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; struct gfs2_alloc *al = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); struct page *page; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) goto out_uninit; if (&ip->i_inode == sdp->sd_rindex) { error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &m_ip->i_gh); if (unlikely(error)) { gfs2_glock_dq(&ip->i_gh); goto out_uninit; } } alloc_required = gfs2_write_alloc_required(ip, pos, len); if (alloc_required || gfs2_is_jdata(ip)) gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { al = gfs2_alloc_get(ip); if (!al) { error = -ENOMEM; goto out_unlock; } error = gfs2_quota_lock_check(ip); if (error) goto out_alloc_put; al->al_requested = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip); if (error) goto out_qunlock; } rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) rblocks += gfs2_rg_blocks(al); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; error = -ENOMEM; flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, index, flags); *pagep = page; if (unlikely(!page)) goto out_endtrans; if (gfs2_is_stuffed(ip)) { error = 0; if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { error = gfs2_unstuff_dinode(ip, page); if (error == 0) goto prepare_write; } else if (!PageUptodate(page)) { error = stuffed_readpage(ip, page); } goto out; } prepare_write: error = __block_write_begin(page, from, len, gfs2_block_map); out: if (error == 0) return 0; unlock_page(page); page_cache_release(page); gfs2_trans_end(sdp); if (pos + len > ip->i_inode.i_size) gfs2_trim_blocks(&ip->i_inode); goto out_trans_fail; out_endtrans: gfs2_trans_end(sdp); out_trans_fail: if (alloc_required) { gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); out_alloc_put: gfs2_alloc_put(ip); } out_unlock: if (&ip->i_inode == sdp->sd_rindex) { gfs2_glock_dq(&m_ip->i_gh); gfs2_holder_uninit(&m_ip->i_gh); } gfs2_glock_dq(&ip->i_gh); out_uninit: gfs2_holder_uninit(&ip->i_gh); return error; }
static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { int ret, needed_blocks; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; unsigned from, to; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { /* * clear the flag so that no new write * will trap here again. */ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; retry: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } /* We cannot recurse into the filesystem as the transaction is already * started */ flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; goto out; } down_write(&EXT4_I(inode)->xattr_sem); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { ret = 0; goto out; } from = 0; to = ext4_get_inline_size(inode); if (!PageUptodate(page)) { ret = ext4_read_inline_page(inode, page); if (ret < 0) goto out; } ret = ext4_destroy_inline_data_nolock(handle, inode); if (ret) goto out; if (ext4_should_dioread_nolock(inode)) ret = __block_write_begin(page, from, to, ext4_get_block_write); else ret = __block_write_begin(page, from, to, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = ext4_walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); } if (ret) { unlock_page(page); page_cache_release(page); page = NULL; ext4_orphan_add(handle, inode); up_write(&EXT4_I(inode)->xattr_sem); sem_held = 0; ext4_journal_stop(handle); handle = NULL; ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to * make sure the inode is removed from the * orphan list in that case. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; if (page) block_commit_write(page, from, to); out: if (page) { unlock_page(page); page_cache_release(page); } if (sem_held) up_write(&EXT4_I(inode)->xattr_sem); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); return ret; }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) { struct list_head * entry; int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; int max_mapped = vm_mapped_ratio * nr_pages; while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); BUG_ON(!PageLRU(page)); BUG_ON(PageActive(page)); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page_zone(page), classzone)) continue; max_scan--; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * This is the non-racy check for busy page. * It is critical to check PageDirty _after_ we made sure * the page is freeable so not in use by anybody. * At this point we're guaranteed that page->buffers is NULL, * nobody can refill page->buffers under us because we still * hold the page lock. */ if (!page->mapping || page_count(page) > 1) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped < 0) { spin_unlock(&pagemap_lru_lock); nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) goto out; shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); #ifdef CONFIG_QUOTA shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); #endif if (!*failed_swapout) *failed_swapout = !swap_out(classzone); max_mapped = nr_pages * vm_mapped_ratio; spin_lock(&pagemap_lru_lock); refill_inactive(nr_pages, classzone); } continue; } if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } __lru_cache_del(page); /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); out: return nr_pages; }