/* * If the page can not be invalidated, it is moved to the * inactive list to speed up its reclaim. It is moved to the * head of the list, rather than the tail, to give the flusher * threads some time to write it out, as this is much more * effective than the single-page writeout from reclaim. * * If the page isn't page_mapped and dirty/writeback, the page * could reclaim asap using PG_reclaim. * * 1. active, mapped page -> none * 2. active, dirty/writeback page -> inactive, head, PG_reclaim * 3. inactive, mapped page -> none * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim * 5. inactive, clean -> inactive, tail * 6. Others -> none * * In 4, why it moves inactive's head, the VM expects the page would * be write it out by flusher threads as this is much more effective * than the single-page writeout from reclaim. */ static void lru_deactivate_fn(struct page *page, void *arg) { int lru, file; bool active; struct zone *zone = page_zone(page); if (!PageLRU(page)) return; if (PageUnevictable(page)) return; /* Some processes are using the page */ if (page_mapped(page)) return; active = PageActive(page); file = page_is_file_cache(page); lru = page_lru_base_type(page); del_page_from_lru_list(zone, page, lru + active); ClearPageActive(page); ClearPageReferenced(page); add_page_to_lru_list(zone, page, lru); if (PageWriteback(page) || PageDirty(page)) { /* * PG_reclaim could be raced with end_page_writeback * It can make readahead confusing. But race window * is _really_ small and it's non-critical problem. */ SetPageReclaim(page); } else { /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ list_move_tail(&page->lru, &zone->lru[lru].list); mem_cgroup_rotate_reclaimable_page(page); __count_vm_event(PGROTATED); } if (active) __count_vm_event(PGDEACTIVATE); update_page_reclaim_stat(zone, page, file, 0); }
static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, size_t nr_pages) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; struct bio *bio; struct page *page; unsigned int max_pages; int i; max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { /* Block layer cannot split bios :( */ bio->bi_vcnt = i; bio->bi_idx = 0; bio->bi_size = i * PAGE_SIZE; bio->bi_bdev = super->s_bdev; bio->bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = writeseg_end_io; atomic_inc(&super->s_pending_writes); submit_bio(WRITE, bio); ofs += i * PAGE_SIZE; index += i; nr_pages -= i; i = 0; bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); } page = find_lock_page(mapping, index + i); BUG_ON(!page); bio->bi_io_vec[i].bv_page = page; bio->bi_io_vec[i].bv_len = PAGE_SIZE; bio->bi_io_vec[i].bv_offset = 0; BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct page *page) { swp_entry_t entry; struct address_space *address_space; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); radix_tree_delete(&address_space->page_tree, page_private(page)); set_page_private(page, 0); ClearPageSwapCache(page); address_space->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); }
/** * Implements Linux VM address_space::invalidatepage() method. This method is * called when the page is truncate from a file, either as a result of * explicit truncate, or when inode is removed from memory (as a result of * final iput(), umount, or memory pressure induced icache shrinking). * * [0, offset] bytes of the page remain valid (this is for a case of not-page * aligned truncate). Lustre leaves partially truncated page in the cache, * relying on struct inode::i_size to limit further accesses. */ static void ll_invalidatepage(struct page *vmpage, #ifdef HAVE_INVALIDATE_RANGE unsigned int offset, unsigned int length #else unsigned long offset #endif ) { struct inode *inode; struct lu_env *env; struct cl_page *page; struct cl_object *obj; LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); /* * It is safe to not check anything in invalidatepage/releasepage * below because they are run with page locked and all our io is * happening with locked page too */ #ifdef HAVE_INVALIDATE_RANGE if (offset == 0 && length == PAGE_SIZE) { #else if (offset == 0) { #endif /* See the comment in ll_releasepage() */ env = cl_env_percpu_get(); LASSERT(!IS_ERR(env)); inode = vmpage->mapping->host; obj = ll_i2info(inode)->lli_clob; if (obj != NULL) { page = cl_vmpage_page(vmpage, obj); if (page != NULL) { cl_page_delete(env, page); cl_page_put(env, page); } } else LASSERT(vmpage->private == 0); cl_env_percpu_put(env); } }
/* * write out a page to a file */ static int write_page(struct bitmap *bitmap, struct page *page, int wait) { int ret = -ENOMEM; if (bitmap->file == NULL) return write_sb_page(bitmap->mddev, bitmap->offset, page, wait); flush_dcache_page(page); /* make sure visible to anyone reading the file */ if (wait) lock_page(page); else { if (TestSetPageLocked(page)) return -EAGAIN; /* already locked */ if (PageWriteback(page)) { unlock_page(page); return -EAGAIN; } } ret = page->mapping->a_ops->prepare_write(bitmap->file, page, 0, PAGE_SIZE); if (!ret) ret = page->mapping->a_ops->commit_write(bitmap->file, page, 0, PAGE_SIZE); if (ret) { unlock_page(page); return ret; } set_page_dirty(page); /* force it to be written out */ if (!wait) { /* add to list to be waited for by daemon */ struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO); item->page = page; get_page(page); spin_lock(&bitmap->write_lock); list_add(&item->list, &bitmap->complete_pages); spin_unlock(&bitmap->write_lock); md_wakeup_thread(bitmap->writeback_daemon); } return write_one_page(page, wait); }
/** * nilfs_copy_page -- copy the page with buffers * @dst: destination page * @src: source page * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. * * This function is for both data pages and btnode pages. The dirty flag * should be treated by caller. The page must not be under i/o. * Both src and dst page must be locked */ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) { struct buffer_head *dbh, *dbufs, *sbh, *sbufs; unsigned long mask = NILFS_BUFFER_INHERENT_BITS; BUG_ON(PageWriteback(dst)); sbh = sbufs = page_buffers(src); if (!page_has_buffers(dst)) create_empty_buffers(dst, sbh->b_size, 0); if (copy_dirty) mask |= BIT(BH_Dirty); dbh = dbufs = page_buffers(dst); do { lock_buffer(sbh); lock_buffer(dbh); dbh->b_state = sbh->b_state & mask; dbh->b_blocknr = sbh->b_blocknr; dbh->b_bdev = sbh->b_bdev; sbh = sbh->b_this_page; dbh = dbh->b_this_page; } while (dbh != dbufs); copy_highpage(dst, src); if (PageUptodate(src) && !PageUptodate(dst)) SetPageUptodate(dst); else if (!PageUptodate(src) && PageUptodate(dst)) ClearPageUptodate(dst); if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) SetPageMappedToDisk(dst); else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) ClearPageMappedToDisk(dst); do { unlock_buffer(sbh); unlock_buffer(dbh); sbh = sbh->b_this_page; dbh = dbh->b_this_page; } while (dbh != dbufs); }
/* * invalidate part or all of a page */ static void afs_file_invalidatepage(struct page *page, unsigned long offset) { _enter("{%lu},%lu", page->index, offset); BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally * invalidated, so real IO is not possible anymore. */ if (offset == 0 && !PageWriteback(page)) page->mapping->a_ops->releasepage(page, 0); } _leave(""); } /* end afs_file_invalidatepage() */
int zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; ASSERT(PageLocked(pp)); ASSERT(!PageWriteback(pp)); ASSERT(!(current->flags & PF_NOFS)); /* * Annotate this call path with a flag that indicates that it is * unsafe to use KM_SLEEP during memory allocations due to the * potential for a deadlock. KM_PUSHPAGE should be used instead. */ current->flags |= PF_NOFS; (void) zfs_putpage(mapping->host, pp, wbc); current->flags &= ~PF_NOFS; return (0); }
/** * Implements Linux VM address_space::invalidatepage() method. This method is * called when the page is truncate from a file, either as a result of * explicit truncate, or when inode is removed from memory (as a result of * final iput(), umount, or memory pressure induced icache shrinking). * * [0, offset] bytes of the page remain valid (this is for a case of not-page * aligned truncate). Lustre leaves partially truncated page in the cache, * relying on struct inode::i_size to limit further accesses. */ static int cl_invalidatepage(struct page *vmpage, unsigned long offset) { struct inode *inode; struct lu_env *env; struct cl_page *page; struct cl_object *obj; int result; int refcheck; LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); /* * It is safe to not check anything in invalidatepage/releasepage * below because they are run with page locked and all our io is * happening with locked page too */ result = 0; if (offset == 0) { env = cl_env_get(&refcheck); if (!IS_ERR(env)) { inode = vmpage->mapping->host; obj = ll_i2info(inode)->lli_clob; if (obj != NULL) { page = cl_vmpage_page(vmpage, obj); if (page != NULL) { lu_ref_add(&page->cp_reference, "delete", vmpage); cl_page_delete(env, page); result = 1; lu_ref_del(&page->cp_reference, "delete", vmpage); cl_page_put(env, page); } } else LASSERT(vmpage->private == 0); cl_env_put(env, &refcheck); }
static int bc_io_show(struct seq_file *f, void *v) { struct list_head *lh; struct page_beancounter *pb; struct page *pg; lh = (struct list_head *)v; if (lh == &pb_io_list) { seq_printf(f, "Races: anon %lu missed %lu\n", anon_pages, not_released); seq_printf(f, "%-*s %-1s %-*s %-4s %*s %*s " "%-*s %-*s %-1s %-*s %-*s\n", PTR_SIZE, "pb", "", PTR_SIZE, "page", "flg", INT_SIZE, "cnt", INT_SIZE, "mcnt", PTR_SIZE, "pb_list", PTR_SIZE, "page_pb", "", PTR_SIZE, "mapping", INT_SIZE, "ub"); return 0; } pb = list_entry(lh, struct page_beancounter, io_list); pg = pb->page; seq_printf(f, "%p %c %p %c%c%c%c %*d %*d %p %p %c %p %d\n", pb, pb->io_debug ? 'e' : 'm', pg, PageDirty(pg) ? 'D' : 'd', PageAnon(pg) ? 'A' : 'a', PageWriteback(pg) ? 'W' : 'w', PageLocked(pg) ? 'L' : 'l', INT_SIZE, page_count(pg), INT_SIZE, page_mapcount(pg), pb->page_pb_list, page_pbc(pg), iopb_to_pb(page_pbc(pg)) == pb ? ' ' : '!', pg->mapping, pb->ub->ub_uid); return 0; }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct page *page) { struct address_space *address_space; int i, nr = hpage_nr_pages(page); swp_entry_t entry; pgoff_t idx; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); idx = swp_offset(entry); for (i = 0; i < nr; i++) { radix_tree_delete(&address_space->page_tree, idx + i); set_page_private(page + i, 0); } ClearPageSwapCache(page); address_space->nrpages -= nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); ADD_CACHE_INFO(del_total, nr); }
/** * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode * @mapping: the address_space which holds the pages to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * * This function only removes the unlocked pages, if you want to * remove all the pages of one inode, you must call truncate_inode_pages. * * invalidate_mapping_pages() will not block on IO activity. It will not * invalidate pages which are dirty, locked, under writeback or mapped into * pagetables. */ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; pgoff_t next = start; unsigned long ret = 0; int i; pagevec_init(&pvec, 0); while (next <= end && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; if (TestSetPageLocked(page)) { next++; continue; } if (page->index > next) next = page->index; next++; if (PageDirty(page) || PageWriteback(page)) goto unlock; if (page_mapped(page)) goto unlock; ret += invalidate_complete_page(mapping, page); unlock: unlock_page(page); if (next > end) break; } pagevec_release(&pvec); cond_resched(); } return ret; }
/** * truncate_inode_pages - truncate *all* the pages from an offset * @mapping: mapping to truncate * @lstart: offset from which to truncate * * Truncate the page cache at a set offset, removing the pages that are beyond * that offset (and zeroing out partial pages). * * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass * will wait. This is to prevent as much IO as possible in the affected region. * The first pass will remove most pages, so the search cost of the second pass * is low. * * When looking at page->index outside the page lock we need to be careful to * copy it into a local to avoid races (it could change at any time). * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. * * Called under (and serialised by) inode->i_sem. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; pgoff_t next; int i; if (mapping->nrpages == 0) return; pagevec_init(&pvec, 0); next = start; while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t page_index = page->index; if (page_index > next) next = page_index; next++; if (TestSetPageLocked(page)) continue; if (PageWriteback(page)) { unlock_page(page); continue; } truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); cond_resched(); } if (partial) { struct page *page = find_lock_page(mapping, start - 1); if (page) { wait_on_page_writeback(page); truncate_partial_page(page, partial); unlock_page(page); page_cache_release(page); } } next = start; for ( ; ; ) { cond_resched(); if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { if (next == start) break; next = start; continue; } for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; lock_page(page); wait_on_page_writeback(page); if (page->index > next) next = page->index; next++; truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); } }
static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct ll_cl_context *lcc; const struct lu_env *env = NULL; struct cl_io *io; struct cl_page *page = NULL; struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; struct page *vmpage = NULL; unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; int result = 0; ENTRY; CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); lcc = ll_cl_find(file); if (lcc == NULL) { io = NULL; GOTO(out, result = -EIO); } env = lcc->lcc_env; io = lcc->lcc_io; /* To avoid deadlock, try to lock page first. */ vmpage = grab_cache_page_nowait(mapping, index); if (unlikely(vmpage == NULL || PageDirty(vmpage) || PageWriteback(vmpage))) { struct vvp_io *vio = vvp_env_io(env); struct cl_page_list *plist = &vio->u.write.vui_queue; /* if the page is already in dirty cache, we have to commit * the pages right now; otherwise, it may cause deadlock * because it holds page lock of a dirty page and request for * more grants. It's okay for the dirty page to be the first * one in commit page list, though. */ if (vmpage != NULL && plist->pl_nr > 0) { unlock_page(vmpage); put_page(vmpage); vmpage = NULL; } /* commit pages and then wait for page lock */ result = vvp_io_write_commit(env, io); if (result < 0) GOTO(out, result); if (vmpage == NULL) { vmpage = grab_cache_page_write_begin(mapping, index, flags); if (vmpage == NULL) GOTO(out, result = -ENOMEM); } } page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); lcc->lcc_page = page; lu_ref_add(&page->cp_reference, "cl_io", io); cl_page_assume(env, io, page); if (!PageUptodate(vmpage)) { /* * We're completely overwriting an existing page, * so _don't_ set it up to date until commit_write */ if (from == 0 && to == PAGE_SIZE) { CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); POISON_PAGE(vmpage, 0x11); } else { /* TODO: can be optimized at OSC layer to check if it * is a lockless IO. In that case, it's not necessary * to read the data. */ result = ll_prepare_partial_page(env, io, page); if (result == 0) SetPageUptodate(vmpage); } } if (result < 0) cl_page_unassume(env, io, page); EXIT; out: if (result < 0) { if (vmpage != NULL) { unlock_page(vmpage); put_page(vmpage); } if (!IS_ERR_OR_NULL(page)) { lu_ref_del(&page->cp_reference, "cl_io", io); cl_page_put(env, page); } if (io) io->ci_result = result; } else { *pagep = vmpage; *fsdata = lcc; } RETURN(result); }
/** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page * @data: data passed to writepage function * * If a page is already under I/O, write_cache_pages() skips it, even * if it's dirty. This is desirable behaviour for memory-cleaning writeback, * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() * and msync() need to guarantee that all the data which was dirty at the time * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; int nr_pages; pgoff_t index; pgoff_t end; /* Inclusive */ int scanned = 0; int range_whole = 0; long nr_to_write = wbc->nr_to_write; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; return 0; } pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; scanned = 1; } retry: while (!done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* * At this point we hold neither mapping->tree_lock nor * lock on the page itself: the page may be truncated or * invalidated (changing page->mapping to NULL), or even * swizzled back from swapper_space to tmpfs file * mapping */ lock_page(page); if (unlikely(page->mapping != mapping)) { unlock_page(page); continue; } if (!wbc->range_cyclic && page->index > end) { done = 1; unlock_page(page); continue; } if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); if (PageWriteback(page) || !clear_page_dirty_for_io(page)) { unlock_page(page); continue; } ret = (*writepage)(page, wbc, data); if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { unlock_page(page); ret = 0; } if (ret || (--nr_to_write <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; } } pagevec_release(&pvec); cond_resched(); } if (!scanned && !done) { /* * We hit the last page and there is more work to be done: wrap * back to the start of the file */ scanned = 1; index = 0; goto retry; } if (!wbc->no_nrwrite_index_update) { if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) mapping->writeback_index = index; wbc->nr_to_write = nr_to_write; } return ret; }
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; int dirty, err; if (!f2fs_exist_data(dn->inode)) goto clear_out; err = f2fs_reserve_block(dn, 0); if (err) return err; f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); read_inline_data(page, dn->inode_page); set_page_dirty(page); /* clear dirty state */ dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); write_data_page(dn, &fio); f2fs_wait_on_page_writeback(page, DATA, true); if (dirty) { inode_dec_dirty_pages(dn->inode); remove_dirty_inode(dn->inode); } /* this converted inline_data should be recovered. */ set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); f2fs_put_dnode(dn); return 0; } int f2fs_convert_inline_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; struct page *ipage, *page; int err = 0; if (!f2fs_has_inline_data(inode)) return 0; page = f2fs_grab_cache_page(inode->i_mapping, 0, false); if (!page) return -ENOMEM; f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } set_new_dnode(&dn, inode, ipage, ipage, 0); if (f2fs_has_inline_data(inode)) err = f2fs_convert_inline_page(&dn, page); f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); f2fs_balance_fs(sbi, dn.node_changed); return err; } int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; if (!f2fs_has_inline_data(inode)) { f2fs_put_dnode(&dn); return -EAGAIN; } f2fs_bug_on(F2FS_I_SB(inode), page->index); f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); clear_inline_node(dn.inode_page); f2fs_put_dnode(&dn); return 0; }
static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { struct mpage_data *mpd = data; struct bio *bio = mpd->bio; struct address_space *mapping = page->mapping; struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; unsigned long end_index; const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; sector_t last_block; sector_t block_in_file; sector_t blocks[MAX_BUF_PER_PAGE]; unsigned page_block; unsigned first_unmapped = blocks_per_page; struct block_device *bdev = NULL; int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; int length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; /* If they're all mapped and dirty, do it */ page_block = 0; do { BUG_ON(buffer_locked(bh)); if (!buffer_mapped(bh)) { /* * unmapped dirty buffers are created by * __set_page_dirty_buffers -> mmapped data */ if (buffer_dirty(bh)) goto confused; if (first_unmapped == blocks_per_page) first_unmapped = page_block; continue; } if (first_unmapped != blocks_per_page) goto confused; /* hole -> non-hole */ if (!buffer_dirty(bh) || !buffer_uptodate(bh)) goto confused; if (page_block) { if (bh->b_blocknr != blocks[page_block-1] + 1) goto confused; } blocks[page_block++] = bh->b_blocknr; boundary = buffer_boundary(bh); if (boundary) { boundary_block = bh->b_blocknr; boundary_bdev = bh->b_bdev; } bdev = bh->b_bdev; } while ((bh = bh->b_this_page) != head); if (first_unmapped) goto page_is_mapped; /* * Page has buffers, but they are all unmapped. The page was * created by pagein or read over a hole which was handled by * block_read_full_page(). If this address_space is also * using mpage_readpages then this can rarely happen. */ goto confused; } /* * The page has no buffers: map it to disk */ BUG_ON(!PageUptodate(page)); block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; map_bh.b_size = 1 << blkbits; if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) unmap_underlying_metadata(map_bh.b_bdev, map_bh.b_blocknr); if (buffer_boundary(&map_bh)) { boundary_block = map_bh.b_blocknr; boundary_bdev = map_bh.b_bdev; } if (page_block) { if (map_bh.b_blocknr != blocks[page_block-1] + 1) goto confused; } blocks[page_block++] = map_bh.b_blocknr; boundary = buffer_boundary(&map_bh); bdev = map_bh.b_bdev; if (block_in_file == last_block) break; block_in_file++; } BUG_ON(page_block == 0); first_unmapped = page_block; page_is_mapped: end_index = i_size >> PAGE_CACHE_SHIFT; if (page->index >= end_index) { /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. * "A file is mapped in multiples of the page size. For a file * that is not a multiple of the page size, the remaining memory * is zeroed when mapped, and writes to that region are not * written out to the file." */ unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || !offset) goto confused; zero_user_segment(page, offset, PAGE_CACHE_SIZE); } /* * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) bio = mpage_bio_submit(wr, bio); alloc_new: if (bio == NULL) { if (first_unmapped == blocks_per_page) { if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), page, wbc)) { clean_buffers(page, first_unmapped); goto out; } } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); if (bio == NULL) goto confused; wbc_init_bio(wbc, bio); } /* * Must try to add the page before marking the buffer clean or * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ wbc_account_io(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(wr, bio); goto alloc_new; } clean_buffers(page, first_unmapped); BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { bio = mpage_bio_submit(wr, bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); } } else { mpd->last_block_in_bio = blocks[blocks_per_page - 1]; } goto out; confused: if (bio) bio = mpage_bio_submit(wr, bio); if (mpd->use_writepage) { ret = mapping->a_ops->writepage(page, wbc); } else { ret = -EAGAIN; goto out; } /* * The caller has a ref on the inode, so *mapping is stable */ mapping_set_error(mapping, ret); out: mpd->bio = bio; return ret; }
int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; struct ext4_io_page *io_page; struct buffer_head *bh, *head; int ret = 0; blocksize = 1 << inode->i_blkbits; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); if (!io_page) { set_page_dirty(page); unlock_page(page); return -ENOMEM; } io_page->p_page = page; atomic_set(&io_page->p_count, 1); get_page(page); set_page_writeback(page); ClearPageError(page); for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_start >= len) { clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* * We only get here on ENOMEM. Not much else * we can do but mark the page as dirty, and * better luck next time. */ set_page_dirty(page); break; } } unlock_page(page); /* * If the page was truncated before we could do the writeback, * or we had a memory allocation error while trying to write * the first buffer head, we won't have submitted any pages for * I/O. In that case we need to make sure we've cleared the * PageWriteback bit from the page to prevent the system from * wedging later on. */ put_io_page(io_page); return ret; }
int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; struct ext4_io_page *io_page; struct buffer_head *bh, *head; int ret = 0; blocksize = 1 << inode->i_blkbits; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); if (!io_page) { set_page_dirty(page); unlock_page(page); return -ENOMEM; } io_page->p_page = page; atomic_set(&io_page->p_count, 1); get_page(page); set_page_writeback(page); ClearPageError(page); /* * Comments copied from block_write_full_page_endio: * * The page straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_start >= len) { clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* * We only get here on ENOMEM. Not much else * we can do but mark the page as dirty, and * better luck next time. */ set_page_dirty(page); break; } } unlock_page(page); /* * If the page was truncated before we could do the writeback, * or we had a memory allocation error while trying to write * the first buffer head, we won't have submitted any pages for * I/O. In that case we need to make sure we've cleared the * PageWriteback bit from the page to prevent the system from * wedging later on. */ put_io_page(io_page); return ret; }
/** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page * @data: data passed to writepage function * * If a page is already under I/O, write_cache_pages() skips it, even * if it's dirty. This is desirable behaviour for memory-cleaning writeback, * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() * and msync() need to guarantee that all the data which was dirty at the time * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; int cycled; int range_whole = 0; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; return 0; } pagevec_init(&pvec, 0); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; if (index == 0) cycled = 1; else cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ } retry: done_index = index; while (!done && (index <= end)) { int i; nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* * At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page. */ if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ done = 1; break; } done_index = page->index + 1; lock_page(page); /* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no * real expectation of this data interity operation * even if there is now a new, dirty page at the same * pagecache address. */ if (unlikely(page->mapping != mapping)) { continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ done = 1; break; } } if (wbc->nr_to_write > 0) { wbc->nr_to_write--; if (wbc->nr_to_write == 0 && wbc->sync_mode == WB_SYNC_NONE) { /* * We stop writing back only if we are * not doing integrity sync. In case of * integrity sync we have to keep going * because someone may be concurrently * dirtying pages, and we might have * synced a lot of newly appeared dirty * pages, but have not synced all of the * old dirty pages. */ done = 1; break; } } if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; break; } } pagevec_release(&pvec); cond_resched(); } if (!cycled && !done) { /* * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ cycled = 1; index = 0; end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; return ret; }
/* * Try to free buffers if "page" has them. */ static int remap_preparepage(struct page *page, int fastmode) { struct address_space *mapping; int waitcnt = fastmode ? 0 : 10; BUG_ON(!PageLocked(page)); mapping = page_mapping(page); if (PageWriteback(page) && !PagePrivate(page) && !PageSwapCache(page)) { printk("remap: mapping %p page %p\n", page->mapping, page); return -REMAPPREP_WB; } if (PageWriteback(page)) wait_on_page_writeback(page); if (PagePrivate(page)) { #ifdef DEBUG_MSG printk("rmap: process page with buffers...\n"); #endif /* XXX copied from shrink_list() */ if (PageDirty(page) && is_page_cache_freeable(page) && mapping != NULL && mapping->a_ops->writepage != NULL) { spin_lock_irq(&mapping->tree_lock); if (clear_page_dirty_for_io(page)) { int res; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, .nonblocking = 1, .for_reclaim = 1, }; spin_unlock_irq(&mapping->tree_lock); SetPageReclaim(page); res = mapping->a_ops->writepage(page, &wbc); if (res < 0) /* not implemented. help */ BUG(); if (res == WRITEPAGE_ACTIVATE) { ClearPageReclaim(page); return -REMAPPREP_WB; } if (!PageWriteback(page)) { /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } lock_page(page); if (!PagePrivate(page)) return 0; } else spin_unlock_irq(&mapping->tree_lock); } while (1) { if (try_to_release_page(page, GFP_KERNEL)) break; if (!waitcnt) return -REMAPPREP_BUFFER; msleep(10); waitcnt--; if (!waitcnt) print_buffer(page); } }
static int gfs2_write_jdata_pagevec(struct address_space *mapping, struct writeback_control *wbc, struct pagevec *pvec, int nr_pages, pgoff_t end, pgoff_t *done_index) { struct inode *inode = mapping->host; struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); int i; int ret; ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); if (ret < 0) return ret; for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; /* * At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page. */ if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ ret = 1; break; } *done_index = page->index; lock_page(page); if (unlikely(page->mapping != mapping)) { continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; trace_wbc_writepage(wbc, mapping->backing_dev_info); ret = __gfs2_jdata_writepage(page, wbc); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ *done_index = page->index + 1; ret = 1; break; } } /* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { ret = 1; break; } } gfs2_trans_end(sdp); return ret; }
void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), .entries = &page_ext->trace_entries[0], .skip = 3, }; save_stack_trace(&trace); page_ext->order = order; page_ext->gfp_mask = gfp_mask; page_ext->nr_entries = trace.nr_entries; __set_bit(PAGE_EXT_OWNER, &page_ext->flags); } static ssize_t print_page_owner(char __user *buf, size_t count, unsigned long pfn, struct page *page, struct page_ext *page_ext) { int ret; int pageblock_mt, page_mt; char *kbuf; struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0], }; kbuf = kmalloc(count, GFP_KERNEL); if (!kbuf) return -ENOMEM; ret = snprintf(kbuf, count, "Page allocated via order %u, mask 0x%x\n", page_ext->order, page_ext->gfp_mask); if (ret >= count) goto err; /* Print information relevant to grouping pages by mobility */ pageblock_mt = get_pfnblock_migratetype(page, pfn); page_mt = gfpflags_to_migratetype(page_ext->gfp_mask); ret += snprintf(kbuf + ret, count - ret, "PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n", pfn, pfn >> pageblock_order, pageblock_mt, pageblock_mt != page_mt ? "Fallback" : " ", PageLocked(page) ? "K" : " ", PageError(page) ? "E" : " ", PageReferenced(page) ? "R" : " ", PageUptodate(page) ? "U" : " ", PageDirty(page) ? "D" : " ", PageLRU(page) ? "L" : " ", PageActive(page) ? "A" : " ", PageSlab(page) ? "S" : " ", PageWriteback(page) ? "W" : " ", PageCompound(page) ? "C" : " ", PageSwapCache(page) ? "B" : " ", PageMappedToDisk(page) ? "M" : " "); if (ret >= count) goto err; ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0); if (ret >= count) goto err; ret += snprintf(kbuf + ret, count - ret, "\n"); if (ret >= count) goto err; if (copy_to_user(buf, kbuf, ret)) ret = -EFAULT; kfree(kbuf); return ret; err: kfree(kbuf); return -ENOMEM; } static ssize_t read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long pfn; struct page *page; struct page_ext *page_ext; if (!page_owner_inited) return -EINVAL; page = NULL; pfn = min_low_pfn + *ppos; /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */ while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) pfn++; drain_all_pages(NULL); /* Find an allocated page */ for (; pfn < max_pfn; pfn++) { /* * If the new page is in a new MAX_ORDER_NR_PAGES area, * validate the area as existing, skip it if not */ if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) { pfn += MAX_ORDER_NR_PAGES - 1; continue; } /* Check for holes within a MAX_ORDER area */ if (!pfn_valid_within(pfn)) continue; page = pfn_to_page(pfn); if (PageBuddy(page)) { unsigned long freepage_order = page_order_unsafe(page); if (freepage_order < MAX_ORDER) pfn += (1UL << freepage_order) - 1; continue; } page_ext = lookup_page_ext(page); /* * Some pages could be missed by concurrent allocation or free, * because we don't hold the zone lock. */ if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; /* Record the next PFN to read in the file offset */ *ppos = (pfn - min_low_pfn) + 1; return print_page_owner(buf, count, pfn, page, page_ext); } return 0; }
/** * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets * @mapping: mapping to truncate * @lstart: offset from which to truncate * @lend: offset to which to truncate (inclusive) * * Truncate the page cache, removing the pages that are between * specified offsets (and zeroing out partial pages * if lstart or lend + 1 is not page aligned). * * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass * will wait. This is to prevent as much IO as possible in the affected region. * The first pass will remove most pages, so the search cost of the second pass * is low. * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. * * Note that since ->invalidatepage() accepts range to invalidate * truncate_inode_pages_range is able to handle cases where lend + 1 is not * page aligned properly. */ void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) { pgoff_t start; /* inclusive */ pgoff_t end; /* exclusive */ unsigned int partial_start; /* inclusive */ unsigned int partial_end; /* exclusive */ struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; cleancache_invalidate_inode(mapping); if (mapping->nrpages == 0 && mapping->nrexceptional == 0) return; /* Offsets within partial pages */ partial_start = lstart & (PAGE_CACHE_SIZE - 1); partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1); /* * 'start' and 'end' always covers the range of pages to be fully * truncated. Partial pages are covered with 'partial_start' at the * start of the range and 'partial_end' at the end of the range. * Note that 'end' is exclusive while 'lend' is inclusive. */ start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (lend == -1) /* * lend == -1 indicates end-of-file so we have to set 'end' * to the highest possible pgoff_t and since the type is * unsigned we're using -1. */ end = -1; else end = (lend + 1) >> PAGE_CACHE_SHIFT; pagevec_init(&pvec, 0); index = start; while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; if (index >= end) break; if (radix_tree_exceptional_entry(page)) { clear_exceptional_entry(mapping, index, page); continue; } if (!trylock_page(page)) continue; WARN_ON(page->index != index); if (PageWriteback(page)) { unlock_page(page); continue; } truncate_inode_page(mapping, page); unlock_page(page); } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); cond_resched(); index++; } if (partial_start) { struct page *page = find_lock_page(mapping, start - 1); if (page) { unsigned int top = PAGE_CACHE_SIZE; if (start > end) { /* Truncation within a single page */ top = partial_end; partial_end = 0; } wait_on_page_writeback(page); zero_user_segment(page, partial_start, top); cleancache_invalidate_page(mapping, page); if (page_has_private(page)) do_invalidatepage(page, partial_start, top - partial_start); unlock_page(page); page_cache_release(page); } } if (partial_end) { struct page *page = find_lock_page(mapping, end); if (page) { wait_on_page_writeback(page); zero_user_segment(page, 0, partial_end); cleancache_invalidate_page(mapping, page); if (page_has_private(page)) do_invalidatepage(page, 0, partial_end); unlock_page(page); page_cache_release(page); } } /* * If the truncation happened within a single page no pages * will be released, just zeroed, so we can bail out now. */ if (start >= end) return; index = start; for ( ; ; ) { cond_resched(); if (!pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { /* If all gone from start onwards, we're done */ if (index == start) break; /* Otherwise restart to make sure all gone */ index = start; continue; } if (index == start && indices[0] >= end) { /* All gone out of hole to be punched, we're done */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); break; } for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; if (index >= end) { /* Restart punch to make sure all gone */ index = start - 1; break; } if (radix_tree_exceptional_entry(page)) { clear_exceptional_entry(mapping, index, page); continue; } lock_page(page); WARN_ON(page->index != index); wait_on_page_writeback(page); truncate_inode_page(mapping, page); unlock_page(page); } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); index++; } cleancache_invalidate_inode(mapping); }
/* * Inspired by write_cache_pages from /mm/page-writeback.c */ static int ecryptfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret = 0; int done = 0; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; int cycled; int range_whole = 0; int tag; struct page **pgs; int pgidx; /* printk("[g-ecryptfs] Info: call writepages\n"); */ pgs = kmalloc(sizeof(struct page*)*PAGEVEC_SIZE, GFP_KERNEL); if (!pgs) { printk("[g-ecryptfs] Error: pgs alloc failed!\n"); return -EFAULT; } pagevec_init(&pvec, 0); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; if (index == 0) cycled = 1; else cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; retry: if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { int i; struct page *page; nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; pgidx = 0; for (i = 0; i < nr_pages; i++) { page = pvec.pages[i]; /* * At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page. */ if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ done = 1; break; } done_index = page->index + 1; lock_page(page); /* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no * real expectation of this data interity operation * even if there is now a new, dirty page at the same * pagecache address. */ if (unlikely(page->mapping != mapping)) { continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; pgs[pgidx++] = page; } /*trace_wbc_writepage(wbc, mapping->backing_dev_info);*/ ret = ecryptfs_encrypt_pages(pgs, pgidx); //printk("[g-ecryptfs] Info: enc %d pages in writepages\n", pgidx); mapping_set_error(mapping, ret); for (i = 0; i < nr_pages; i++) { page = pvec.pages[i]; if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { if (PageLocked(page)) unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ done = 1; break; } } /* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } } pagevec_release(&pvec); cond_resched(); } if (!cycled && !done) { /* * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ cycled = 1; index = 0; end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; kfree(pgs); return ret; }
/**ltl 功能:遍历给定地址空间的"脏"页面,写这些页面 参数: 返回值: 说明: */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { int ret = 0; int done = 0; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index;/*要冲刷页面的索引*/ pgoff_t end; /* Inclusive *//*冲刷最后一个页面的索引,-1表示要循环冲刷*/ pgoff_t done_index; int cycled;/*主要用在回绕需要分成两段进行冲刷的情况下,为1表示前一段冲刷已经完成。*/ int range_whole = 0; int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) {//是否要循环进行冲刷 writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; if (index == 0) cycled = 1; else cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; retry: if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { int i; /*在地址空间中查找设备了PAGECACHE_TAG_DIRTY标志的页面,将结果保存在pagevec中*/ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; //对找到的页面进行处理。 for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* * At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page. */ if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ done = 1; break; } done_index = page->index + 1; //页面加锁 lock_page(page); /* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no * real expectation of this data interity operation * even if there is now a new, dirty page at the same * pagecache address. */ /*由于在加锁过程中可能其它进程对页面做过改动,因此要做以下判断*/ if (unlikely(page->mapping != mapping)) {//页面无效 continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) {//页面回写完成,I_DIRTY标志已经清除。 /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) {//页面正在回写中,那要根据sync_mode采取策略 if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page);//要等待正在回写完成后才继续 else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; trace_wbc_writepage(wbc, mapping->backing_dev_info); //开始回写"脏"页面 ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ done = 1; break; } } /* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ /*页面写成功后,递减计数器*/ if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } } pagevec_release(&pvec); cond_resched(); } if (!cycled && !done) { /* * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ cycled = 1; index = 0; end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; return ret; }
/* * when btree blocks are allocated, they have some corresponding bits set for * them in one of two extent_io trees. This is used to make sure all of * those extents are on disk for transaction or log commit */ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages) { int ret; int err = 0; int werr = 0; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start = 0; u64 end; unsigned long index; while (1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); if (ret) break; while (start <= end) { cond_resched(); index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_get_page(btree_inode->i_mapping, index); if (!page) continue; btree_lock_page_hook(page); if (!page->mapping) { unlock_page(page); page_cache_release(page); continue; } if (PageWriteback(page)) { if (PageDirty(page)) wait_on_page_writeback(page); else { unlock_page(page); page_cache_release(page); continue; } } err = write_one_page(page, 0); if (err) werr = err; page_cache_release(page); } } while (1) { ret = find_first_extent_bit(dirty_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) break; clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); while (start <= end) { index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_get_page(btree_inode->i_mapping, index); if (!page) continue; if (PageDirty(page)) { btree_lock_page_hook(page); wait_on_page_writeback(page); err = write_one_page(page, 0); if (err) werr = err; } wait_on_page_writeback(page); page_cache_release(page); cond_resched(); } } if (err) werr = err; return werr; }
static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_CACHE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, .page = page, .encrypted_page = NULL, }; trace_f2fs_writepage(page, DATA); if (page->index < end_index) goto write; /* * If the offset is out-of-range of file size, * this page does not have to be written to disk. */ offset = i_size & (PAGE_CACHE_SIZE - 1); if ((page->index >= end_index + 1) || !offset) goto out; zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && available_free_memory(sbi, BASE_CHECK)) goto redirty_out; /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; err = do_write_data_page(&fio); goto done; } /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); goto out; } if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; err = -EAGAIN; f2fs_lock_op(sbi); if (f2fs_has_inline_data(inode)) err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) err = do_write_data_page(&fio); f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) goto redirty_out; clear_cold_data(page); out: inode_dec_dirty_pages(inode); if (err) ClearPageUptodate(page); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); if (wbc->for_reclaim) f2fs_submit_merged_bio(sbi, DATA, WRITE); return 0; redirty_out: redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; int ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); return ret; } /* * This function was copied from write_cche_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { int ret = 0; int done = 0; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; int cycled; int range_whole = 0; int tag; int step = 0; pagevec_init(&pvec, 0); next: if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; if (index == 0) cycled = 1; else cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { int i; nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; if (page->index > end) { done = 1; break; } done_index = page->index; lock_page(page); if (unlikely(page->mapping != mapping)) { continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (step == is_cold_data(page)) goto continue_unlock; if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, DATA); else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { done_index = page->index + 1; done = 1; break; } } if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } } pagevec_release(&pvec); cond_resched(); } if (step < 1) { step++; goto next; } if (!cycled && !done) { cycled = 1; index = 0; end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; return ret; }
static int gfs2_write_jdata_pagevec(struct address_space *mapping, struct writeback_control *wbc, struct pagevec *pvec, int nr_pages, pgoff_t end) { struct inode *inode = mapping->host; struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); struct backing_dev_info *bdi = mapping->backing_dev_info; int i; int ret; ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); if (ret < 0) return ret; for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; lock_page(page); if (unlikely(page->mapping != mapping)) { unlock_page(page); continue; } if (!wbc->range_cyclic && page->index > end) { ret = 1; unlock_page(page); continue; } if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); if (PageWriteback(page) || !clear_page_dirty_for_io(page)) { unlock_page(page); continue; } /* Is the page fully outside i_size? (truncate in progress) */ if (page->index > end_index || (page->index == end_index && !offset)) { page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); continue; } ret = __gfs2_jdata_writepage(page, wbc); if (ret || (--(wbc->nr_to_write) <= 0)) ret = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; ret = 1; } } gfs2_trans_end(sdp); return ret; }
int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; struct ext4_io_page *io_page; struct buffer_head *bh, *head; int ret = 0; blocksize = 1 << inode->i_blkbits; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); if (!io_page) { set_page_dirty(page); unlock_page(page); return -ENOMEM; } io_page->p_page = page; atomic_set(&io_page->p_count, 1); get_page(page); set_page_writeback(page); ClearPageError(page); for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_start >= len) { /* * Comments copied from block_write_full_page_endio: * * The page straddles i_size. It must be zeroed out on * each and every writepage invocation because it may * be mmapped. "A file is mapped in multiples of the * page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when * mapped, and writes to that region are not written * out to the file." */ zero_user_segment(page, block_start, block_end); clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { set_page_dirty(page); break; } } unlock_page(page); put_io_page(io_page); return ret; }