/* * release a page and clean up its private state if it's not busy * - return true if the page can now be released, false if not */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { struct afs_writeback *wb = (struct afs_writeback *) page_private(page); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); _enter("{{%x:%u}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, gfp_flags); /* deny if page is being written to the cache and the caller hasn't * elected to wait */ #ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page)) { if (fscache_check_page_write(vnode->cache, page)) { if (!(gfp_flags & __GFP_WAIT)) { _leave(" = F [cache busy]"); return 0; } fscache_wait_on_page_write(vnode->cache, page); } fscache_uncache_page(vnode->cache, page); ClearPageFsCache(page); } #endif if (PagePrivate(page)) { if (wb) { set_page_private(page, 0); afs_put_writeback(wb); } ClearPagePrivate(page); } /* indicate that the page can be released */ _leave(" = T"); return 1; }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct page *page) { struct address_space *address_space; int i, nr = hpage_nr_pages(page); swp_entry_t entry; pgoff_t idx; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); idx = swp_offset(entry); for (i = 0; i < nr; i++) { radix_tree_delete(&address_space->page_tree, idx + i); set_page_private(page + i, 0); } ClearPageSwapCache(page); address_space->nrpages -= nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); ADD_CACHE_INFO(del_total, nr); }
/* Add an MFN override for a particular page */ int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } WARN_ON(PagePrivate(page)); SetPagePrivate(page); set_page_private(page, mfn); page->index = pfn_to_mfn(pfn); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs = xen_mc_entry(sizeof(*kmap_op)); MULTI_grant_table_op(mcs.mc, GNTTABOP_map_grant_ref, kmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); } /* let's use dev_bus_addr to record the old mfn instead */ kmap_op->dev_bus_addr = page->index; page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other * pfn so that the following mfn_to_pfn(mfn) calls will return the * pfn from the m2p_override (the backend pfn) instead. * We need to do this because the pages shared by the frontend * (xen-blkfront) can be already locked (lock_page, called by * do_read_cache_page); when the userspace backend tries to use them * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so * do_blockdev_direct_IO is going to try to lock the same pages * again resulting in a deadlock. * As a side effect get_user_pages_fast might not be safe on the * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); if (ret == 0 && get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; }
/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ static int nfs_write_mapping(struct address_space *mapping, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; return __nfs_write_mapping(mapping, &wbc, how); } /* * flush the inode to disk. */ int nfs_wb_all(struct inode *inode) { return nfs_write_mapping(inode->i_mapping, 0); } int nfs_wb_nocommit(struct inode *inode) { return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { req = nfs_page_find_request(page); if (req == NULL) goto out; if (test_bit(PG_CLEAN, &req->wb_flags)) { nfs_release_request(req); break; } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the * page as being dirty */ cancel_dirty_page(page, PAGE_CACHE_SIZE); nfs_unlock_request(req); break; } ret = nfs_wait_on_request(req); if (ret < 0) goto out; } if (!PagePrivate(page)) return 0; ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); out: return ret; } static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, }; int ret; do { if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; } else if (!PagePrivate(page)) break; ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); if (ret < 0) goto out_error; } while (PagePrivate(page)); return 0; out_error: __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } /* * Write back all requests on one page - we do this before reading it. */ int nfs_wb_page(struct inode *inode, struct page* page) { return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) { struct nfs_page *req; int ret; if (PageFsCache(page)) nfs_fscache_release_page(page, GFP_KERNEL); req = nfs_find_and_lock_request(page); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; ret = migrate_page(mapping, newpage, page); if (!req) goto out; if (ret) goto out_unlock; page_cache_get(newpage); req->wb_page = newpage; SetPagePrivate(newpage); set_page_private(newpage, page_private(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); out_unlock: nfs_clear_page_tag_locked(req); nfs_release_request(req); out: return ret; } #endif int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) return -ENOMEM; nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) return -ENOMEM; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) return -ENOMEM; /* * NFS congestion size, scale with available memory. * * 64MB: 8192k * 128MB: 11585k * 256MB: 16384k * 512MB: 23170k * 1GB: 32768k * 2GB: 46340k * 4GB: 65536k * 8GB: 92681k * 16GB: 131072k * * This allows larger machines to have larger/more transfers. * Limit the default to 256M */ nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); if (nfs_congestion_kb > 256*1024) nfs_congestion_kb = 256*1024; return 0; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); }
/* * prepare to perform part of a write to a page */ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; struct key *key = afs_file_key(file); unsigned long priv; unsigned f, from = pos & (PAGE_SIZE - 1); unsigned t, to = from + len; pgoff_t index = pos >> PAGE_SHIFT; int ret; _enter("{%x:%u},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); /* We want to store information about how much of a page is altered in * page->private. */ BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { unlock_page(page); put_page(page); _leave(" = %d [prep]", ret); return ret; } SetPageUptodate(page); } /* page won't leak in error case: it eventually gets cleaned off LRU */ *pagep = page; try_again: /* See if this page is already partially written in a way that we can * merge the new write with. */ t = f = 0; if (PagePrivate(page)) { priv = page_private(page); f = priv & AFS_PRIV_MAX; t = priv >> AFS_PRIV_SHIFT; ASSERTCMP(f, <=, t); } if (f != t) { if (PageWriteback(page)) { trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page->index, priv); goto flush_conflicting_write; } /* If the file is being filled locally, allow inter-write * spaces to be merged into writes. If it's not, only write * back what the user gives us. */ if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && (to < f || from > t)) goto flush_conflicting_write; if (from < f) f = from; if (to > t) t = to; } else { f = from; t = to; } priv = (unsigned long)t << AFS_PRIV_SHIFT; priv |= f; trace_afs_page_dirty(vnode, tracepoint_string("begin"), page->index, priv); SetPagePrivate(page); set_page_private(page, priv); _leave(" = 0"); return 0; /* The previous write and this write aren't adjacent or overlapping, so * flush the page out. */ flush_conflicting_write: _debug("flush conflict"); ret = write_one_page(page); if (ret < 0) { _leave(" = %d", ret); return ret; } ret = lock_page_killable(page); if (ret < 0) { _leave(" = %d", ret); return ret; } goto try_again; }