static void __tux3_test_set_page_writeback(struct page *page, int old_writeback) { struct address_space *mapping = page->mapping; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); if (!old_writeback) { /* If PageForked(), don't touch tag */ if (!PageForked(page)) radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) __inc_bdi_stat(bdi, BDI_WRITEBACK); } /* If PageForked(), don't touch tag */ if (!PageDirty(page) && !PageForked(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_TOWRITE); spin_unlock_irqrestore(&mapping->tree_lock, flags); } if (!old_writeback) { account_page_writeback(page); tux3_accout_set_writeback(page); } }
static void __xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip, void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), int tag) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_IEOFBLOCKS; spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); if (!radix_tree_tagged(&pag->pag_ici_root, tag)) { /* clear the eofblocks tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), tag); spin_unlock(&ip->i_mount->m_perag_lock); clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); }
int test_set_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) { radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) __inc_bdi_stat(bdi, BDI_WRITEBACK); } if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_TOWRITE); spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } if (!ret) account_page_writeback(page); return ret; }
void xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); trace_xfs_inode_clear_eofblocks_tag(ip); radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_EOFBLOCKS_TAG); if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { /* clear the eofblocks tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_EOFBLOCKS_TAG); spin_unlock(&ip->i_mount->m_perag_lock); trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); }
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_block_group_cache *cache[8]; int ret; int err = 0; int werr = 0; struct radix_tree_root *radix = &root->fs_info->block_group_radix; int i; struct btrfs_path path; btrfs_init_path(&path); while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, 0, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_DIRTY); if (!ret) break; for (i = 0; i < ret; i++) { radix_tree_tag_clear(radix, cache[i]->key.objectid + cache[i]->key.offset -1, BTRFS_BLOCK_GROUP_DIRTY); err = write_one_cache_group(trans, root, &path, cache[i]); if (err) werr = err; } } return werr; }
/** * __hwspin_lock_request() - tag an hwspinlock as used and power it up * * This is an internal function that prepares an hwspinlock instance * before it is given to the user. The function assumes that * hwspinlock_tree_lock is taken. * * Returns 0 or positive to indicate success, and a negative value to * indicate an error (with the appropriate error code) */ static int __hwspin_lock_request(struct hwspinlock *hwlock) { struct device *dev = hwlock->bank->dev; struct hwspinlock *tmp; int ret; /* prevent underlying implementation from being removed */ if (!try_module_get(dev->driver->owner)) { dev_err(dev, "%s: can't get owner\n", __func__); return -EINVAL; } /* notify PM core that power is now needed */ ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "%s: can't power on device\n", __func__); pm_runtime_put_noidle(dev); module_put(dev->driver->owner); return ret; } /* mark hwspinlock as used, should not fail */ tmp = radix_tree_tag_clear(&hwspinlock_tree, hwlock_to_id(hwlock), HWSPINLOCK_UNUSED); /* self-sanity check that should never fail */ WARN_ON(tmp != hwlock); return ret; }
int test_set_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; if (mapping) { unsigned long flags; write_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } if (!ret) inc_zone_page_state(page, NR_WRITEBACK); return ret; }
int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) { __dec_bdi_stat(bdi, BDI_WRITEBACK); __bdi_writeout_inc(bdi); } } spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } if (ret) dec_zone_page_state(page, NR_WRITEBACK); return ret; }
/* * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; struct btrfs_buffer *gang[4]; int i; while(1) { ret = radix_tree_gang_lookup_tag( &extent_root->fs_info->cache_radix, (void **)gang, 0, ARRAY_SIZE(gang), CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { ret = __free_extent(trans, extent_root, gang[i]->blocknr, 1, 1); radix_tree_tag_clear(&extent_root->fs_info->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING_DEL); btrfs_block_release(extent_root, gang[i]); } } return 0; }
STATIC void xfs_inode_clear_reclaim_tag( struct xfs_perag *pag, xfs_ino_t ino) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(pag->pag_mount, ino), XFS_ICI_RECLAIM_TAG); xfs_perag_clear_reclaim_tag(pag); }
void __xfs_inode_clear_reclaim_tag( xfs_mount_t *mp, xfs_perag_t *pag, xfs_inode_t *ip) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); __xfs_inode_clear_reclaim(pag, ip); }
/** * nfs_clear_page_writeback - Unlock request and wake up sleepers */ void nfs_clear_page_writeback(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); if (req->wb_page != NULL) { spin_lock(&nfsi->req_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); spin_unlock(&nfsi->req_lock); } nfs_unlock_request(req); }
/** * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ void nfs_clear_page_tag_locked(struct nfs_page *req) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); if (req->wb_page != NULL) { spin_lock(&inode->i_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); spin_unlock(&inode->i_lock); } nfs_unlock_request(req); }
/** * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ void nfs_clear_page_tag_locked(struct nfs_page *req) { if (test_bit(PG_MAPPED, &req->wb_flags)) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); nfs_unlock_request(req); spin_unlock(&inode->i_lock); } else nfs_unlock_request(req); }
void regression2_test(void) { int i; struct page *p; int max_slots = RADIX_TREE_MAP_SIZE; unsigned long int start, end; struct page *pages[1]; printf("running regression test 2 (should take milliseconds)\n"); /* 0. */ for (i = 0; i <= max_slots - 1; i++) { p = page_alloc(); radix_tree_insert(&mt_tree, i, p); } radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); /* 1. */ start = 0; end = max_slots - 2; radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1, PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); /* 2. */ p = page_alloc(); radix_tree_insert(&mt_tree, max_slots, p); /* 3. */ radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); /* 4. */ for (i = max_slots - 1; i >= 0; i--) radix_tree_delete(&mt_tree, i); /* 5. */ // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot // can return. start = 1; end = max_slots - 2; radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end, PAGECACHE_TAG_TOWRITE); /* We remove all the remained nodes */ radix_tree_delete(&mt_tree, max_slots); printf("regression test 2, done\n"); }
static void xfs_perag_clear_reclaim_tag( struct xfs_perag *pag) { struct xfs_mount *mp = pag->pag_mount; ASSERT(spin_is_locked(&pag->pag_ici_lock)); if (--pag->pag_ici_reclaimable) return; /* clear the reclaim tag from the perag radix tree */ spin_lock(&mp->m_perag_lock); radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, XFS_ICI_RECLAIM_TAG); spin_unlock(&mp->m_perag_lock); trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); }
STATIC void __xfs_inode_clear_reclaim( xfs_perag_t *pag, xfs_inode_t *ip) { pag->pag_ici_reclaimable--; if (!pag->pag_ici_reclaimable) { spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } }
/** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode * @dst: Destination list * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. * @tag: tag to scan for * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space * starting at index idx_start, is scanned. * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's i_lock when calling this function */ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int tag) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; pgoff_t idx_end; int found, i; int res; struct list_head *list; res = 0; if (npages == 0) idx_end = ~0; else idx_end = idx_start + npages - 1; for (;;) { found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, tag); if (found <= 0) break; for (i = 0; i < found; i++) { req = pgvec[i]; if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; if (nfs_set_page_tag_locked(req)) { kref_get(&req->wb_kref); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, tag); list = pnfs_choose_commit_list(req, dst); nfs_list_add_request(req, list); res++; if (res == INT_MAX) goto out; } } /* for latency reduction */ cond_resched_lock(&nfsi->vfs_inode.i_lock); } out: return res; }
/* * NILFS2 needs clear_page_dirty() in the following two cases: * * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears * page dirty flags when it copies back pages from the shadow cache * (gcdat->{i_mapping,i_btnode_cache}) to its original cache * (dat->{i_mapping,i_btnode_cache}). * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. */ int __nilfs_clear_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; if (mapping) { spin_lock_irq(&mapping->tree_lock); if (test_bit(PG_dirty, &page->flags)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); spin_unlock_irq(&mapping->tree_lock); return clear_page_dirty_for_io(page); } spin_unlock_irq(&mapping->tree_lock); return 0; } return TestClearPageDirty(page); }
int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; if (mapping) { unsigned long flags; write_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } return ret; }
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root *gang[8]; struct btrfs_fs_info *fs_info = root->fs_info; int i; int ret; int err = 0; while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); if (ret == 0) break; for (i = 0; i < ret; i++) { root = gang[i]; radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); btrfs_free_log(trans, root); btrfs_update_reloc_root(trans, root); btrfs_orphan_commit_root(trans, root); if (root->commit_root != root->node) { switch_commit_root(root); btrfs_set_root_node(&root->root_item, root->node); } err = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); if (err) break; } } return err; }
} for_each_index(i, base, order) { assert(!radix_tree_tag_get(&tree, i, 0)); assert(!radix_tree_tag_get(&tree, i, 1)); } assert(radix_tree_tag_set(&tree, index, 0)); for_each_index(i, base, order) { assert(radix_tree_tag_get(&tree, i, 0)); assert(!radix_tree_tag_get(&tree, i, 1)); } assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1); assert(radix_tree_tag_clear(&tree, index, 0)); for_each_index(i, base, order) { assert(!radix_tree_tag_get(&tree, i, 0)); assert(radix_tree_tag_get(&tree, i, 1)); } assert(radix_tree_tag_clear(&tree, index, 1)); assert(!radix_tree_tagged(&tree, 0)); assert(!radix_tree_tagged(&tree, 1)); item_kill_tree(&tree); } static void __multiorder_tag_test2(unsigned order, unsigned long index2)
static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { struct radix_tree_node *node; unsigned long index; unsigned int offset; unsigned int tag; void **slot; VM_BUG_ON(!PageLocked(page)); __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); if (shadow) { mapping->nrshadows++; /* * Make sure the nrshadows update is committed before * the nrpages update so that final truncate racing * with reclaim does not see both counters 0 at the * same time and miss a shadow entry. */ smp_wmb(); } mapping->nrpages--; if (!node) { /* Clear direct pointer tags in root node */ mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; radix_tree_replace_slot(slot, shadow); return; } /* Clear tree tags for the removed page */ index = page->index; offset = index & RADIX_TREE_MAP_MASK; for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { if (test_bit(offset, node->tags[tag])) radix_tree_tag_clear(&mapping->page_tree, index, tag); } /* Delete page, swap shadow entry */ radix_tree_replace_slot(slot, shadow); workingset_node_pages_dec(node); if (shadow) workingset_node_shadows_inc(node); else if (__radix_tree_delete_node(&mapping->page_tree, node)) return; /* * Track node that only contains shadow entries. * * Avoid acquiring the list_lru lock if already tracked. The * list_empty() test is safe as node->private_list is * protected by mapping->tree_lock. */ if (!workingset_node_pages(node) && list_empty(&node->private_list)) { node->private_data = mapping; list_lru_add(&workingset_shadow_nodes, &node->private_list); } }
static int copy_user_bh(struct page *to, struct inode *inode, struct buffer_head *bh, unsigned long vaddr) { struct blk_dax_ctl dax = { .sector = to_sector(bh, inode), .size = bh->b_size, }; struct block_device *bdev = bh->b_bdev; void *vto; if (dax_map_atomic(bdev, &dax) < 0) return PTR_ERR(dax.addr); vto = kmap_atomic(to); copy_user_page(vto, (void __force *)dax.addr, vaddr, to); kunmap_atomic(vto); dax_unmap_atomic(bdev, &dax); return 0; } #define NO_SECTOR -1 #define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT)) static int dax_radix_entry(struct address_space *mapping, pgoff_t index, sector_t sector, bool pmd_entry, bool dirty) { struct radix_tree_root *page_tree = &mapping->page_tree; pgoff_t pmd_index = DAX_PMD_INDEX(index); int type, error = 0; void *entry; WARN_ON_ONCE(pmd_entry && !dirty); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); spin_lock_irq(&mapping->tree_lock); entry = radix_tree_lookup(page_tree, pmd_index); if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) { index = pmd_index; goto dirty; } entry = radix_tree_lookup(page_tree, index); if (entry) { type = RADIX_DAX_TYPE(entry); if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) { error = -EIO; goto unlock; } if (!pmd_entry || type == RADIX_DAX_PMD) goto dirty; /* * We only insert dirty PMD entries into the radix tree. This * means we don't need to worry about removing a dirty PTE * entry and inserting a clean PMD entry, thus reducing the * range we would flush with a follow-up fsync/msync call. */ radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; } if (sector == NO_SECTOR) { /* * This can happen during correct operation if our pfn_mkwrite * fault raced against a hole punch operation. If this * happens the pte that was hole punched will have been * unmapped and the radix tree entry will have been removed by * the time we are called, but the call will still happen. We * will return all the way up to wp_pfn_shared(), where the * pte_same() check will fail, eventually causing page fault * to be retried by the CPU. */ goto unlock; } error = radix_tree_insert(page_tree, index, RADIX_DAX_ENTRY(sector, pmd_entry)); if (error) goto unlock; mapping->nrexceptional++; dirty: if (dirty) radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); unlock: spin_unlock_irq(&mapping->tree_lock); return error; } static int dax_writeback_one(struct block_device *bdev, struct address_space *mapping, pgoff_t index, void *entry) { struct radix_tree_root *page_tree = &mapping->page_tree; int type = RADIX_DAX_TYPE(entry); struct radix_tree_node *node; struct blk_dax_ctl dax; void **slot; int ret = 0; spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even * without the tree itself locked. These unlocked entries * need verification under the tree lock. */ if (!__radix_tree_lookup(page_tree, index, &node, &slot)) goto unlock; if (*slot != entry) goto unlock; /* another fsync thread may have already written back this entry */ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) goto unlock; if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) { ret = -EIO; goto unlock; } dax.sector = RADIX_DAX_SECTOR(entry); dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE); spin_unlock_irq(&mapping->tree_lock); /* * We cannot hold tree_lock while calling dax_map_atomic() because it * eventually calls cond_resched(). */ ret = dax_map_atomic(bdev, &dax); if (ret < 0) return ret; if (WARN_ON_ONCE(ret < dax.size)) { ret = -EIO; goto unmap; } wb_cache_pmem(dax.addr, dax.size); spin_lock_irq(&mapping->tree_lock); radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE); spin_unlock_irq(&mapping->tree_lock); unmap: dax_unmap_atomic(bdev, &dax); return ret; unlock: spin_unlock_irq(&mapping->tree_lock); return ret; } /* * Flush the mapping to the persistent domain within the byte range of [start, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc) { struct inode *inode = mapping->host; pgoff_t start_index, end_index, pmd_index; pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; bool done = false; int i, ret = 0; void *entry; if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; pmd_index = DAX_PMD_INDEX(start_index); rcu_read_lock(); entry = radix_tree_lookup(&mapping->page_tree, pmd_index); rcu_read_unlock(); /* see if the start of our range is covered by a PMD entry */ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) start_index = pmd_index; tag_pages_for_writeback(mapping, start_index, end_index); pagevec_init(&pvec, 0); while (!done) { pvec.nr = find_get_entries_tag(mapping, start_index, PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, pvec.pages, indices); if (pvec.nr == 0) break; for (i = 0; i < pvec.nr; i++) { if (indices[i] > end_index) { done = true; break; } ret = dax_writeback_one(bdev, mapping, indices[i], pvec.pages[i]); if (ret < 0) return ret; } } wmb_pmem(); return 0; } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct vm_area_struct *vma, struct vm_fault *vmf) { unsigned long vaddr = (unsigned long)vmf->virtual_address; struct address_space *mapping = inode->i_mapping; struct block_device *bdev = bh->b_bdev; struct blk_dax_ctl dax = { .sector = to_sector(bh, inode), .size = bh->b_size, }; pgoff_t size; int error; i_mmap_lock_read(mapping); /* * Check truncate didn't happen while we were allocating a block. * If it did, this block may or may not be still allocated to the * file. We can't tell the filesystem to free it because we can't * take i_mutex here. In the worst case, the file still has blocks * allocated past the end of the file. */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (unlikely(vmf->pgoff >= size)) { error = -EIO; goto out; } if (dax_map_atomic(bdev, &dax) < 0) { error = PTR_ERR(dax.addr); goto out; } if (buffer_unwritten(bh) || buffer_new(bh)) { clear_pmem(dax.addr, PAGE_SIZE); wmb_pmem(); } dax_unmap_atomic(bdev, &dax); error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, vmf->flags & FAULT_FLAG_WRITE); if (error) goto out; error = vm_insert_mixed(vma, vaddr, dax.pfn); out: i_mmap_unlock_read(mapping); return error; } /** * __dax_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @get_block: The filesystem method used to translate file offsets to blocks * @complete_unwritten: The filesystem method used to convert unwritten blocks * to written so the data written to them is exposed. This is required for * required by write faults for filesystems that will return unwritten * extent mappings from @get_block, but it is optional for reads as * dax_insert_mapping() will always zero unwritten blocks. If the fs does * not support unwritten extents, the it should pass NULL. * * When a page fault occurs, filesystems may call this helper in their * fault handler for DAX files. __dax_fault() assumes the caller has done all * the necessary locking for the page fault to proceed successfully. */ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block, dax_iodone_t complete_unwritten) { struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct page *page; struct buffer_head bh; unsigned long vaddr = (unsigned long)vmf->virtual_address; unsigned blkbits = inode->i_blkbits; sector_t block; pgoff_t size; int error; int major = 0; size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) return VM_FAULT_SIGBUS; memset(&bh, 0, sizeof(bh)); block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits); bh.b_bdev = inode->i_sb->s_bdev; bh.b_size = PAGE_SIZE; repeat: page = find_get_page(mapping, vmf->pgoff); if (page) { if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { put_page(page); return VM_FAULT_RETRY; } if (unlikely(page->mapping != mapping)) { unlock_page(page); put_page(page); goto repeat; } size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (unlikely(vmf->pgoff >= size)) { /* * We have a struct page covering a hole in the file * from a read fault and we've raced with a truncate */ error = -EIO; goto unlock_page; } } error = get_block(inode, block, &bh, 0); if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; /* fs corruption? */ if (error) goto unlock_page; if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { if (vmf->flags & FAULT_FLAG_WRITE) { error = get_block(inode, block, &bh, 1); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; if (error) goto unlock_page; } else { return dax_load_hole(mapping, page, vmf); } } if (vmf->cow_page) { struct page *new_page = vmf->cow_page; if (buffer_written(&bh)) error = copy_user_bh(new_page, inode, &bh, vaddr); else clear_user_highpage(new_page, vaddr); if (error) goto unlock_page; vmf->page = page; if (!page) { i_mmap_lock_read(mapping); /* Check we didn't race with truncate */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) { i_mmap_unlock_read(mapping); error = -EIO; goto out; } } return VM_FAULT_LOCKED; } /* Check we didn't race with a read fault installing a new page */ if (!page && major) page = find_lock_page(mapping, vmf->pgoff); if (page) { unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, PAGE_SIZE, 0); delete_from_page_cache(page); unlock_page(page); put_page(page); page = NULL; } /* * If we successfully insert the new mapping over an unwritten extent, * we need to ensure we convert the unwritten extent. If there is an * error inserting the mapping, the filesystem needs to leave it as * unwritten to prevent exposure of the stale underlying data to * userspace, but we still need to call the completion function so * the private resources on the mapping buffer can be released. We * indicate what the callback should do via the uptodate variable, same * as for normal BH based IO completions. */ error = dax_insert_mapping(inode, &bh, vma, vmf); if (buffer_unwritten(&bh)) { if (complete_unwritten) complete_unwritten(&bh, !error); else WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); } out: if (error == -ENOMEM) return VM_FAULT_OOM | major; /* -EBUSY is fine, somebody else faulted on the same PTE */ if ((error < 0) && (error != -EBUSY)) return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; unlock_page: if (page) { unlock_page(page); put_page(page); } goto out; }
/* * at transaction commit time we need to schedule the old roots for * deletion via btrfs_drop_snapshot. This runs through all the * reference counted roots that were modified in the current * transaction and puts them into the drop list */ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) { struct btrfs_dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; int i; int ret; int err = 0; u32 refs; while (1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); if (ret == 0) break; for (i = 0; i < ret; i++) { root = gang[i]; radix_tree_tag_clear(radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); BUG_ON(!root->ref_tree); dirty = root->dirty_root; btrfs_free_log(trans, root); btrfs_free_reloc_root(trans, root); if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); free_extent_buffer(root->commit_root); root->commit_root = NULL; root->dirty_root = NULL; spin_lock(&root->list_lock); list_del_init(&dirty->root->dead_list); spin_unlock(&root->list_lock); kfree(dirty->root); kfree(dirty); /* make sure to update the root on disk * so we get any updates to the block used * counts */ err = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); continue; } memset(&root->root_item.drop_progress, 0, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; root->commit_root = NULL; root->dirty_root = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); btrfs_set_root_level(&root->root_item, btrfs_header_level(root->node)); btrfs_set_root_generation(&root->root_item, root->root_key.offset); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); if (err) break; refs = btrfs_root_refs(&dirty->root->root_item); btrfs_set_root_refs(&dirty->root->root_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, &dirty->root->root_key, &dirty->root->root_item); BUG_ON(err); if (refs == 1) { list_add(&dirty->list, list); } else { WARN_ON(1); free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); } } } return err; }
/* * Search for an existing write request, and attempt to update * it to reflect a new dirty region on a given page. * * If the attempt fails, then the existing request is flushed out * to disk. */ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, struct page *page, unsigned int offset, unsigned int bytes) { struct nfs_page *req; unsigned int rqend; unsigned int end; int error; if (!PagePrivate(page)) return NULL; end = offset + bytes; spin_lock(&inode->i_lock); for (;;) { req = nfs_page_find_request_locked(page); if (req == NULL) goto out_unlock; rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if * the offsets are non-contiguous. * Note: nfs_flush_incompatible() will already * have flushed out requests having wrong owners. */ if (offset > rqend || end < req->wb_offset) goto out_flushme; if (nfs_set_page_tag_locked(req)) break; /* The request is locked, so wait and then retry */ spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error != 0) goto out_err; spin_lock(&inode->i_lock); } if (nfs_clear_request_commit(req)) radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; } if (end > rqend) req->wb_bytes = end - req->wb_offset; else req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); return req; out_flushme: spin_unlock(&inode->i_lock); nfs_release_request(req); error = nfs_wb_page(inode, page); out_err: return ERR_PTR(error); }