/* * Wait for a request to complete. * * Interruptible by fatal signals only. */ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; pgoff_t idx_end, next; unsigned int res = 0; int error; if (npages == 0) idx_end = ~0; else idx_end = idx_start + npages - 1; next = idx_start; while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; BUG_ON(!NFS_WBACK_BUSY(req)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); spin_lock(&inode->i_lock); if (error < 0) return error; res++; } return res; }
int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* * Look for a request corresponding to this page. If there * is one, and it belongs to another file, we flush it out * before we try to copy anything into the page. Do this * due to the lack of an ACCESS-type call in NFSv2. * Also do the same if we find a request from an existing * dropped page. */ do { req = nfs_page_find_request(page); if (req == NULL) return 0; do_flush = req->wb_page != page || req->wb_context != ctx; nfs_release_request(req); if (!do_flush) return 0; status = nfs_wb_page(page->mapping->host, page); } while (status == 0); return status; }
static struct nfs_page *nfs_find_and_lock_request(struct page *page) { struct inode *inode = page->mapping->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); for (;;) { req = nfs_page_find_request_locked(page); if (req == NULL) break; if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, * then the call to nfs_set_page_tag_locked() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) return ERR_PTR(ret); spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); return req; }
/* * Insert a write request into an inode */ static inline void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode; spin_lock(&nfs_wreq_lock); if (list_empty(&req->wb_hash)) { spin_unlock(&nfs_wreq_lock); return; } if (!NFS_WBACK_BUSY(req)) printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n"); inode = req->wb_inode; list_del(&req->wb_hash); INIT_LIST_HEAD(&req->wb_hash); inode->u.nfs_i.npages--; if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n"); if (list_empty(&inode->u.nfs_i.writeback)) { spin_unlock(&nfs_wreq_lock); iput(inode); } else spin_unlock(&nfs_wreq_lock); nfs_clear_request(req); nfs_release_request(req); }
/** * nfs_unlock_request - Unlock request and wake up sleepers. * @req: */ void nfs_unlock_request(struct nfs_page *req) { if (!NFS_WBACK_BUSY(req)) { printk(KERN_ERR "NFS: Invalid unlock attempted\n"); BUG(); } smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); }
/* * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { struct inode *inode = page->mapping->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); for(;;) { req = nfs_page_find_request_locked(page); if (req == NULL) { spin_unlock(&inode->i_lock); return 0; } if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, * then the call to nfs_set_page_tag_locked() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) return ret; spin_lock(&inode->i_lock); } if (test_bit(PG_CLEAN, &req->wb_flags)) { spin_unlock(&inode->i_lock); BUG(); } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); BUG(); } spin_unlock(&inode->i_lock); if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); return pgio->pg_error; } return 0; }
/* * Remove a write request from an inode */ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); iput(inode); } else spin_unlock(&inode->i_lock); nfs_clear_request(req); nfs_release_request(req); }
/* * Try to update an existing write request, or create one if there is none. * * Note: Should always be called with the Page Lock held to prevent races * if we have to add a new request. Also assumes that the caller has * already called nfs_flush_incompatible() if necessary. */ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { struct inode *inode = page->mapping->host; struct nfs_page *req; int error; req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; req = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(req)) goto out; error = nfs_inode_add_request(inode, req); if (error != 0) { nfs_release_request(req); req = ERR_PTR(error); } out: return req; }
/* * Wait for a request to complete. * * Interruptible by signals only if mounted with intr flag. */ static int nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages) { struct list_head *p, *head; unsigned long idx_end; unsigned int res = 0; int error; if (npages == 0) idx_end = ~0; else idx_end = idx_start + npages - 1; head = &inode->u.nfs_i.writeback; restart: spin_lock(&nfs_wreq_lock); list_for_each_prev(p, head) { unsigned long pg_idx; struct nfs_page *req = nfs_inode_wb_entry(p); if (file && req->wb_file != file) continue; pg_idx = page_index(req->wb_page); if (pg_idx < idx_start) break; if (pg_idx > idx_end) continue; if (!NFS_WBACK_BUSY(req)) continue; req->wb_count++; spin_unlock(&nfs_wreq_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) return error; res++; goto restart; }
/* * nfs_page_group_destroy - sync the destruction of page groups * @req - request that no longer needs the page group * * releases the page group reference from each member once all * members have called this function. */ static void nfs_page_group_destroy(struct kref *kref) { struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); struct nfs_page *tmp, *next; /* subrequests must release the ref on the head request */ if (req->wb_head != req) nfs_release_request(req->wb_head); if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) return; tmp = req; do { next = tmp->wb_this_page; /* unlink and free */ tmp->wb_this_page = tmp; tmp->wb_head = tmp; nfs_free_request(tmp); tmp = next; } while (tmp != req); }
/** * nfs_unlock_and_release_request - Unlock request and release the nfs_page * @req: */ void nfs_unlock_and_release_request(struct nfs_page *req) { nfs_unlock_request(req); nfs_release_request(req); }
/* * Search for an existing write request, and attempt to update * it to reflect a new dirty region on a given page. * * If the attempt fails, then the existing request is flushed out * to disk. */ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, struct page *page, unsigned int offset, unsigned int bytes) { struct nfs_page *req; unsigned int rqend; unsigned int end; int error; if (!PagePrivate(page)) return NULL; end = offset + bytes; spin_lock(&inode->i_lock); for (;;) { req = nfs_page_find_request_locked(page); if (req == NULL) goto out_unlock; rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if * the offsets are non-contiguous. * Note: nfs_flush_incompatible() will already * have flushed out requests having wrong owners. */ if (offset > rqend || end < req->wb_offset) goto out_flushme; if (nfs_set_page_tag_locked(req)) break; /* The request is locked, so wait and then retry */ spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error != 0) goto out_err; spin_lock(&inode->i_lock); } if (nfs_clear_request_commit(req)) radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; } if (end > rqend) req->wb_bytes = end - req->wb_offset; else req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); return req; out_flushme: spin_unlock(&inode->i_lock); nfs_release_request(req); error = nfs_wb_page(inode, page); out_err: return ERR_PTR(error); }
/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ static int nfs_write_mapping(struct address_space *mapping, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; return __nfs_write_mapping(mapping, &wbc, how); } /* * flush the inode to disk. */ int nfs_wb_all(struct inode *inode) { return nfs_write_mapping(inode->i_mapping, 0); } int nfs_wb_nocommit(struct inode *inode) { return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { req = nfs_page_find_request(page); if (req == NULL) goto out; if (test_bit(PG_CLEAN, &req->wb_flags)) { nfs_release_request(req); break; } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the * page as being dirty */ cancel_dirty_page(page, PAGE_CACHE_SIZE); nfs_unlock_request(req); break; } ret = nfs_wait_on_request(req); if (ret < 0) goto out; } if (!PagePrivate(page)) return 0; ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); out: return ret; } static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, }; int ret; do { if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; } else if (!PagePrivate(page)) break; ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); if (ret < 0) goto out_error; } while (PagePrivate(page)); return 0; out_error: __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } /* * Write back all requests on one page - we do this before reading it. */ int nfs_wb_page(struct inode *inode, struct page* page) { return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) { struct nfs_page *req; int ret; if (PageFsCache(page)) nfs_fscache_release_page(page, GFP_KERNEL); req = nfs_find_and_lock_request(page); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; ret = migrate_page(mapping, newpage, page); if (!req) goto out; if (ret) goto out_unlock; page_cache_get(newpage); req->wb_page = newpage; SetPagePrivate(newpage); set_page_private(newpage, page_private(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); out_unlock: nfs_clear_page_tag_locked(req); nfs_release_request(req); out: return ret; } #endif int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) return -ENOMEM; nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) return -ENOMEM; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) return -ENOMEM; /* * NFS congestion size, scale with available memory. * * 64MB: 8192k * 128MB: 11585k * 256MB: 16384k * 512MB: 23170k * 1GB: 32768k * 2GB: 46340k * 4GB: 65536k * 8GB: 92681k * 16GB: 131072k * * This allows larger machines to have larger/more transfers. * Limit the default to 256M */ nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); if (nfs_congestion_kb > 256*1024) nfs_congestion_kb = 256*1024; return 0; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); }