Example #1
0
/*
 * Wait for a request to complete.
 *
 * Interruptible by fatal signals only.
 */
static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
{
	struct nfs_inode *nfsi = NFS_I(inode);
	struct nfs_page *req;
	pgoff_t idx_end, next;
	unsigned int		res = 0;
	int			error;

	if (npages == 0)
		idx_end = ~0;
	else
		idx_end = idx_start + npages - 1;

	next = idx_start;
	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
		if (req->wb_index > idx_end)
			break;

		next = req->wb_index + 1;
		BUG_ON(!NFS_WBACK_BUSY(req));

		kref_get(&req->wb_kref);
		spin_unlock(&inode->i_lock);
		error = nfs_wait_on_request(req);
		nfs_release_request(req);
		spin_lock(&inode->i_lock);
		if (error < 0)
			return error;
		res++;
	}
	return res;
}
Example #2
0
static void single_check(void)
{
	struct item *items[BATCH];
	RADIX_TREE(tree, GFP_KERNEL);
	int ret;

	item_insert(&tree, 0);
	item_tag_set(&tree, 0, 0);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
	assert(ret == 1);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
	assert(ret == 0);
	verify_tag_consistency(&tree, 0);
	verify_tag_consistency(&tree, 1);
	item_kill_tree(&tree);
}
/**
 * hwspin_lock_request() - request an hwspinlock
 *
 * This function should be called by users of the hwspinlock device,
 * in order to dynamically assign them an unused hwspinlock.
 * Usually the user of this lock will then have to communicate the lock's id
 * to the remote core before it can be used for synchronization (to get the
 * id of a given hwlock, use hwspin_lock_get_id()).
 *
 * Should be called from a process context (might sleep)
 *
 * Returns the address of the assigned hwspinlock, or NULL on error
 */
struct hwspinlock *hwspin_lock_request(void)
{
	struct hwspinlock *hwlock;
	int ret;

	mutex_lock(&hwspinlock_tree_lock);

	/* look for an unused lock */
	ret = radix_tree_gang_lookup_tag(&hwspinlock_tree, (void **)&hwlock,
						0, 1, HWSPINLOCK_UNUSED);
	if (ret == 0) {
		pr_warn("a free hwspinlock is not available\n");
		hwlock = NULL;
		goto out;
	}

	/* sanity check that should never fail */
	WARN_ON(ret > 1);

	/* mark as used and power up */
	ret = __hwspin_lock_request(hwlock);
	if (ret < 0)
		hwlock = NULL;

out:
	mutex_unlock(&hwspinlock_tree_lock);
	return hwlock;
}
/*
 * find all the blocks marked as pending in the radix tree and remove
 * them from the extent map
 */
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root)
{
	int ret;
	struct btrfs_buffer *gang[4];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup_tag(
					&extent_root->fs_info->cache_radix,
					(void **)gang, 0,
					ARRAY_SIZE(gang),
					CTREE_EXTENT_PENDING_DEL);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			ret = __free_extent(trans, extent_root,
					    gang[i]->blocknr, 1, 1);
			radix_tree_tag_clear(&extent_root->fs_info->cache_radix,
					     gang[i]->blocknr,
					     CTREE_EXTENT_PENDING_DEL);
			btrfs_block_release(extent_root, gang[i]);
		}
	}
	return 0;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				    struct btrfs_root *root)
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	struct radix_tree_root *radix = &root->fs_info->block_group_radix;
	int i;
	struct btrfs_path path;
	btrfs_init_path(&path);

	while(1) {
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 0, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_DIRTY);
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_tag_clear(radix, cache[i]->key.objectid +
					     cache[i]->key.offset -1,
					     BTRFS_BLOCK_GROUP_DIRTY);
			err = write_one_cache_group(trans, root,
						    &path, cache[i]);
			if (err)
				werr = err;
		}
	}
	return werr;
}
Example #6
0
static void gang_check(struct radix_tree_root *tree,
			char *thrash_state, int tag)
{
	struct item *items[BATCH];
	int nr_found;
	unsigned long index = 0;
	unsigned long last_index = 0;

	while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items,
					index, BATCH, tag))) {
		int i;

		for (i = 0; i < nr_found; i++) {
			struct item *item = items[i];

			while (last_index < item->index) {
				assert(thrash_state[last_index] != NODE_TAGGED);
				last_index++;
			}
			assert(thrash_state[last_index] == NODE_TAGGED);
			last_index++;
		}
		index = items[nr_found - 1]->index + 1;
	}
}
Example #7
0
static void program_nr_limits(struct msm_bus_fabric_device *fabdev)
{
	int num_nr_lim = 0;
	int i;
	struct msm_bus_inode_info *info[fabdev->num_nr_lim];
	struct msm_bus_fabric *fabric = to_msm_bus_fabric(fabdev);

	num_nr_lim = radix_tree_gang_lookup_tag(&fabric->fab_tree,
			(void **)&info, fabric->fabdev.id, fabdev->num_nr_lim,
			MASTER_NODE);

	for (i = 0; i < num_nr_lim; i++)
		fabdev->algo->config_limiter(fabdev, info[i]);
}
Example #8
0
static void single_check(void)
{
	struct item *items[BATCH];
	RADIX_TREE(tree, GFP_KERNEL);
	int ret;
	unsigned long first = 0;

	item_insert(&tree, 0);
	item_tag_set(&tree, 0, 0);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
	assert(ret == 1);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
	assert(ret == 0);
	verify_tag_consistency(&tree, 0);
	verify_tag_consistency(&tree, 1);
	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
	assert(ret == 1);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
	assert(ret == 1);
	item_tag_clear(&tree, 0, 0);
	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
	assert(ret == 0);
	item_kill_tree(&tree);
}
Example #9
0
/*
 * Check that tags propagate correctly when contracting a tree.
 */
static void contract_checks(void)
{
	struct item *item;
	int tmp;
	RADIX_TREE(tree, GFP_KERNEL);

	tmp = 1<<RADIX_TREE_MAP_SHIFT;
	item_insert(&tree, tmp);
	item_insert(&tree, tmp+1);
	item_tag_set(&tree, tmp, 0);
	item_tag_set(&tree, tmp, 1);
	item_tag_set(&tree, tmp+1, 0);
	item_delete(&tree, tmp+1);
	item_tag_clear(&tree, tmp, 1);

	assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1);
	assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0);

	assert(item_tag_get(&tree, tmp, 0) == 1);
	assert(item_tag_get(&tree, tmp, 1) == 0);

	verify_tag_consistency(&tree, 0);
	item_kill_tree(&tree);
}
Example #10
0
/*
 * Check whether file has possible unwriten pages.
 *
 * \retval 1    file is mmap-ed or has dirty pages
 *	 0    otherwise
 */
blkcnt_t dirty_cnt(struct inode *inode)
{
	blkcnt_t cnt = 0;
	struct vvp_object *vob = cl_inode2vvp(inode);
	void	      *results[1];

	if (inode->i_mapping)
		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
						  results, 0, 1,
						  PAGECACHE_TAG_DIRTY);
	if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
		cnt = 1;

	return (cnt > 0) ? 1 : 0;
}
Example #11
0
/*
 * Check whether file has possible unwriten pages.
 *
 * \retval 1    file is mmap-ed or has dirty pages
 *         0    otherwise
 */
blkcnt_t dirty_cnt(struct inode *inode)
{
        blkcnt_t cnt = 0;
#ifdef __KERNEL__
        struct ccc_object *vob = cl_inode2ccc(inode);
        void              *results[1];

        if (inode->i_mapping != NULL)
                cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
                                                  results, 0, 1,
                                                  PAGECACHE_TAG_DIRTY);
        if (cnt == 0 && cfs_atomic_read(&vob->cob_mmap_cnt) > 0)
                cnt = 1;

#endif
        return (cnt > 0) ? 1 : 0;
}
Example #12
0
/**
 * nfs_scan_list - Scan a list for matching requests
 * @nfsi: NFS inode
 * @dst: Destination list
 * @idx_start: lower bound of page->index to scan
 * @npages: idx_start + npages sets the upper bound to scan.
 * @tag: tag to scan for
 *
 * Moves elements from one of the inode request lists.
 * If the number of requests is set to 0, the entire address_space
 * starting at index idx_start, is scanned.
 * The requests are *not* checked to ensure that they form a contiguous set.
 * You must be holding the inode's i_lock when calling this function
 */
int nfs_scan_list(struct nfs_inode *nfsi,
		struct list_head *dst, pgoff_t idx_start,
		unsigned int npages, int tag)
{
	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
	struct nfs_page *req;
	pgoff_t idx_end;
	int found, i;
	int res;
	struct list_head *list;

	res = 0;
	if (npages == 0)
		idx_end = ~0;
	else
		idx_end = idx_start + npages - 1;

	for (;;) {
		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
				(void **)&pgvec[0], idx_start,
				NFS_SCAN_MAXENTRIES, tag);
		if (found <= 0)
			break;
		for (i = 0; i < found; i++) {
			req = pgvec[i];
			if (req->wb_index > idx_end)
				goto out;
			idx_start = req->wb_index + 1;
			if (nfs_set_page_tag_locked(req)) {
				kref_get(&req->wb_kref);
				radix_tree_tag_clear(&nfsi->nfs_page_tree,
						req->wb_index, tag);
				list = pnfs_choose_commit_list(req, dst);
				nfs_list_add_request(req, list);
				res++;
				if (res == INT_MAX)
					goto out;
			}
		}
		/* for latency reduction */
		cond_resched_lock(&nfsi->vfs_inode.i_lock);
	}
out:
	return res;
}
Example #13
0
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root)
{
    struct btrfs_root *gang[8];
    struct btrfs_fs_info *fs_info = root->fs_info;
    int i;
    int ret;
    int err = 0;

    while (1) {
        ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
                                         (void **)gang, 0,
                                         ARRAY_SIZE(gang),
                                         BTRFS_ROOT_TRANS_TAG);
        if (ret == 0)
            break;
        for (i = 0; i < ret; i++) {
            root = gang[i];
            radix_tree_tag_clear(&fs_info->fs_roots_radix,
                                 (unsigned long)root->root_key.objectid,
                                 BTRFS_ROOT_TRANS_TAG);

            btrfs_free_log(trans, root);
            btrfs_update_reloc_root(trans, root);
            btrfs_orphan_commit_root(trans, root);

            if (root->commit_root != root->node) {
                switch_commit_root(root);
                btrfs_set_root_node(&root->root_item,
                                    root->node);
            }

            err = btrfs_update_root(trans, fs_info->tree_root,
                                    &root->root_key,
                                    &root->root_item);
            if (err)
                break;
        }
    }
    return err;
}
Example #14
0
/*
 * search from @first to find the next perag with the given tag set.
 */
struct xfs_perag *
xfs_perag_get_tag(
	struct xfs_mount	*mp,
	xfs_agnumber_t		first,
	int			tag)
{
	struct xfs_perag	*pag;
	int			found;
	int			ref;

	rcu_read_lock();
	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
					(void **)&pag, first, 1, tag);
	if (found <= 0) {
		rcu_read_unlock();
		return NULL;
	}
	ref = atomic_inc_return(&pag->pag_ref);
	rcu_read_unlock();
	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
	return pag;
}
/**
 * hwspin_lock_request() - request an hwspinlock
 * @lock_type: User to decide to use mutex or spinlocks
 * 1 to use mutex & 0 to use spinlock. If mutex is used, hwspinlock APIs can
 * be called from context that sleeps but cannot be called from interrupt
 * context. If spinlock is used the APIs can be called from any context but
 * the calling context cannot sleep.
 *
 * This function should be called by users of the hwspinlock device,
 * in order to dynamically assign them an unused hwspinlock.
 * Usually the user of this lock will then have to communicate the lock's id
 * to the remote core before it can be used for synchronization (to get the
 * id of a given hwlock, use hwspin_lock_get_id()).
 *
 * Should be called from a process context (might sleep)
 *
 * Returns the address of the assigned hwspinlock, or NULL on error
 */
struct hwspinlock *hwspin_lock_request(enum lock_type l)
{
	struct hwspinlock *hwlock;
	int ret;

	mutex_lock(&hwspinlock_tree_lock);

	/* look for an unused lock */
	ret = radix_tree_gang_lookup_tag(&hwspinlock_tree, (void **)&hwlock,
						0, 1, HWSPINLOCK_UNUSED);
	if (ret == 0) {
		pr_warn("a free hwspinlock is not available\n");
		hwlock = NULL;
		goto out;
	}

	/* sanity check that should never fail */
	WARN_ON(ret > 1);

	/* mark as used and power up */
	ret = __hwspin_lock_request(hwlock);
	if (ret < 0) {
		hwlock = NULL;
	} else {
		if (l == USE_MUTEX_LOCK)
			mutex_init(&hwlock->sw_l.mlock);
		else
			spin_lock_init(&hwlock->sw_l.slock);

		hwlock->tlock = l;
	}

out:
	mutex_unlock(&hwspinlock_tree_lock);
	return hwlock;
}
Example #16
0
STATIC xfs_inode_t *
xfs_inode_ag_lookup(
	struct xfs_mount	*mp,
	struct xfs_perag	*pag,
	uint32_t		*first_index,
	int			tag)
{
	int			nr_found;
	struct xfs_inode	*ip;

	/*
	 * use a gang lookup to find the next inode in the tree
	 * as the tree is sparse and a gang lookup walks to find
	 * the number of objects requested.
	 */
	if (tag == XFS_ICI_NO_TAG) {
		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
				(void **)&ip, *first_index, 1);
	} else {
		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
				(void **)&ip, *first_index, 1, tag);
	}
	if (!nr_found)
		return NULL;

	/*
	 * Update the index for the next lookup. Catch overflows
	 * into the next AG range which can occur if we have inodes
	 * in the last block of the AG and we are currently
	 * pointing to the last inode.
	 */
	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
		return NULL;
	return ip;
}
int
xfs_reclaim_inodes_ag(
	struct xfs_mount	*mp,
	int			flags,
	int			*nr_to_scan)
{
	struct xfs_perag	*pag;
	int			error = 0;
	int			last_error = 0;
	xfs_agnumber_t		ag;
	int			trylock = flags & SYNC_TRYLOCK;
	int			skipped;

restart:
	ag = 0;
	skipped = 0;
	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
		unsigned long	first_index = 0;
		int		done = 0;
		int		nr_found = 0;

		ag = pag->pag_agno + 1;

		if (trylock) {
			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
				skipped++;
				xfs_perag_put(pag);
				continue;
			}
			first_index = pag->pag_ici_reclaim_cursor;
		} else
			mutex_lock(&pag->pag_ici_reclaim_lock);

		do {
			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
			int	i;

			rcu_read_lock();
			nr_found = radix_tree_gang_lookup_tag(
					&pag->pag_ici_root,
					(void **)batch, first_index,
					XFS_LOOKUP_BATCH,
					XFS_ICI_RECLAIM_TAG);
			if (!nr_found) {
				done = 1;
				rcu_read_unlock();
				break;
			}

			for (i = 0; i < nr_found; i++) {
				struct xfs_inode *ip = batch[i];

				if (done || xfs_reclaim_inode_grab(ip, flags))
					batch[i] = NULL;

				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
								pag->pag_agno)
					continue;
				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
					done = 1;
			}

			
			rcu_read_unlock();

			for (i = 0; i < nr_found; i++) {
				if (!batch[i])
					continue;
				error = xfs_reclaim_inode(batch[i], pag, flags);
				if (error && last_error != EFSCORRUPTED)
					last_error = error;
			}

			*nr_to_scan -= XFS_LOOKUP_BATCH;

			cond_resched();

		} while (nr_found && !done && *nr_to_scan > 0);

		if (trylock && !done)
			pag->pag_ici_reclaim_cursor = first_index;
		else
			pag->pag_ici_reclaim_cursor = 0;
		mutex_unlock(&pag->pag_ici_reclaim_lock);
		xfs_perag_put(pag);
	}

	if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
		trylock = 0;
		goto restart;
	}
	return XFS_ERROR(last_error);
}
Example #18
0
static void compute_nr_limits(struct msm_bus_fabric_device *fabdev, int pnode)
{
	uint64_t total_ib = 0;
	int num_nr_lim = 0;
	uint64_t avail_bw = 0;
	struct msm_bus_inode_info *info[fabdev->num_nr_lim];
	struct msm_bus_fabric *fabric = to_msm_bus_fabric(fabdev);
	int i;

	num_nr_lim = radix_tree_gang_lookup_tag(&fabric->fab_tree,
			(void **)&info, fabric->fabdev.id, fabdev->num_nr_lim,
			MASTER_NODE);

	MSM_BUS_DBG("%s: Found %d NR LIM nodes", __func__, num_nr_lim);
	for (i = 0; i < num_nr_lim; i++)
		total_ib += get_node_maxib(info[i]);

	avail_bw = get_avail_bw(fabdev);
	MSM_BUS_DBG("\n %s: Avail BW %llu", __func__, avail_bw);

	for (i = 0; i < num_nr_lim; i++) {
		uint32_t node_pct = 0;
		uint64_t new_lim_bw = 0;
		uint64_t node_max_ib = 0;
		uint32_t node_max_ib_kB = 0;
		uint32_t total_ib_kB = 0;
		uint64_t bw_node;

		node_max_ib = get_node_maxib(info[i]);
		node_max_ib_kB = msm_bus_div64(1024, node_max_ib);
		total_ib_kB = msm_bus_div64(1024, total_ib);
		node_pct = (node_max_ib_kB * 100) / total_ib_kB;
		bw_node = node_pct * avail_bw;
		new_lim_bw = msm_bus_div64(100, bw_node);

		/*
		 * if limiter bw is more than the requested IB clip to
		   requested IB.
		*/
		if (new_lim_bw >= node_max_ib)
			new_lim_bw = node_max_ib;

		/*
		 * if there is a floor bw for this nr lim node and
		 *  if there is available bw to divy up among the nr masters
		 *  and if the nr lim masters have a non zero vote and
		 *  if the limited bw is below the floor for this node.
		 *    then limit this node to the floor bw.
		 */
		if (info[i]->node_info->floor_bw && node_max_ib && avail_bw &&
			(new_lim_bw <= info[i]->node_info->floor_bw)) {
			MSM_BUS_ERR("\nNode %d:Limiting BW:%llu < floor:%llu",
				info[i]->node_info->id,	new_lim_bw,
						info[i]->node_info->floor_bw);
			new_lim_bw = info[i]->node_info->floor_bw;
		}

		if (new_lim_bw != info[i]->cur_lim_bw) {
			info[i]->cur_lim_bw = new_lim_bw;
			MSM_BUS_DBG("NodeId %d: Requested IB %llu",
					info[i]->node_info->id, node_max_ib);
			MSM_BUS_DBG("Limited to %llu(%d pct of Avail %llu )\n",
					new_lim_bw, node_pct, avail_bw);
		} else {
			MSM_BUS_DBG("NodeId %d: No change Limited to %llu\n",
				info[i]->node_info->id, info[i]->cur_lim_bw);
		}
	}
}
Example #19
0
/*
 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
 * corrupted, we still want to try to reclaim all the inodes. If we don't,
 * then a shut down during filesystem unmount reclaim walk leak all the
 * unreclaimed inodes.
 */
int
xfs_reclaim_inodes_ag(
	struct xfs_mount	*mp,
	int			flags,
	int			*nr_to_scan)
{
	struct xfs_perag	*pag;
	int			error = 0;
	int			last_error = 0;
	xfs_agnumber_t		ag;
	int			trylock = flags & SYNC_TRYLOCK;
	int			skipped;

restart:
	ag = 0;
	skipped = 0;
	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
		unsigned long	first_index = 0;
		int		done = 0;
		int		nr_found = 0;

		ag = pag->pag_agno + 1;

		if (trylock) {
			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
				skipped++;
				xfs_perag_put(pag);
				continue;
			}
			first_index = pag->pag_ici_reclaim_cursor;
		} else
			mutex_lock(&pag->pag_ici_reclaim_lock);

		do {
			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
			int	i;

			rcu_read_lock();
			nr_found = radix_tree_gang_lookup_tag(
					&pag->pag_ici_root,
					(void **)batch, first_index,
					XFS_LOOKUP_BATCH,
					XFS_ICI_RECLAIM_TAG);
			if (!nr_found) {
				done = 1;
				rcu_read_unlock();
				break;
			}

			/*
			 * Grab the inodes before we drop the lock. if we found
			 * nothing, nr == 0 and the loop will be skipped.
			 */
			for (i = 0; i < nr_found; i++) {
				struct xfs_inode *ip = batch[i];

				if (done || xfs_reclaim_inode_grab(ip, flags))
					batch[i] = NULL;

				/*
				 * Update the index for the next lookup. Catch
				 * overflows into the next AG range which can
				 * occur if we have inodes in the last block of
				 * the AG and we are currently pointing to the
				 * last inode.
				 *
				 * Because we may see inodes that are from the
				 * wrong AG due to RCU freeing and
				 * reallocation, only update the index if it
				 * lies in this AG. It was a race that lead us
				 * to see this inode, so another lookup from
				 * the same index will not find it again.
				 */
				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
								pag->pag_agno)
					continue;
				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
					done = 1;
			}

			/* unlock now we've grabbed the inodes. */
			rcu_read_unlock();

			for (i = 0; i < nr_found; i++) {
				if (!batch[i])
					continue;
				error = xfs_reclaim_inode(batch[i], pag, flags);
				if (error && last_error != EFSCORRUPTED)
					last_error = error;
			}

			*nr_to_scan -= XFS_LOOKUP_BATCH;

		} while (nr_found && !done && *nr_to_scan > 0);

		if (trylock && !done)
			pag->pag_ici_reclaim_cursor = first_index;
		else
			pag->pag_ici_reclaim_cursor = 0;
		mutex_unlock(&pag->pag_ici_reclaim_lock);
		xfs_perag_put(pag);
	}

	/*
	 * if we skipped any AG, and we still have scan count remaining, do
	 * another pass this time using blocking reclaim semantics (i.e
	 * waiting on the reclaim locks and ignoring the reclaim cursors). This
	 * ensure that when we get more reclaimers than AGs we block rather
	 * than spin trying to execute reclaim.
	 */
	if (trylock && skipped && *nr_to_scan > 0) {
		trylock = 0;
		goto restart;
	}
	return XFS_ERROR(last_error);
}
Example #20
0
STATIC int
xfs_inode_ag_walk(
	struct xfs_mount	*mp,
	struct xfs_perag	*pag,
	int			(*execute)(struct xfs_inode *ip, int flags,
					   void *args),
	int			flags,
	void			*args,
	int			tag)
{
	uint32_t		first_index;
	int			last_error = 0;
	int			skipped;
	int			done;
	int			nr_found;

restart:
	done = 0;
	skipped = 0;
	first_index = 0;
	nr_found = 0;
	do {
		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
		int		error = 0;
		int		i;

		rcu_read_lock();

		if (tag == -1)
			nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
					(void **)batch, first_index,
					XFS_LOOKUP_BATCH);
		else
			nr_found = radix_tree_gang_lookup_tag(
					&pag->pag_ici_root,
					(void **) batch, first_index,
					XFS_LOOKUP_BATCH, tag);

		if (!nr_found) {
			rcu_read_unlock();
			break;
		}

		/*
		 * Grab the inodes before we drop the lock. if we found
		 * nothing, nr == 0 and the loop will be skipped.
		 */
		for (i = 0; i < nr_found; i++) {
			struct xfs_inode *ip = batch[i];

			if (done || xfs_inode_ag_walk_grab(ip))
				batch[i] = NULL;

			/*
			 * Update the index for the next lookup. Catch
			 * overflows into the next AG range which can occur if
			 * we have inodes in the last block of the AG and we
			 * are currently pointing to the last inode.
			 *
			 * Because we may see inodes that are from the wrong AG
			 * due to RCU freeing and reallocation, only update the
			 * index if it lies in this AG. It was a race that lead
			 * us to see this inode, so another lookup from the
			 * same index will not find it again.
			 */
			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
				continue;
			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
				done = 1;
		}

		/* unlock now we've grabbed the inodes. */
		rcu_read_unlock();

		for (i = 0; i < nr_found; i++) {
			if (!batch[i])
				continue;
			error = execute(batch[i], flags, args);
			IRELE(batch[i]);
			if (error == -EAGAIN) {
				skipped++;
				continue;
			}
			if (error && last_error != -EFSCORRUPTED)
				last_error = error;
		}

		/* bail out if the filesystem is corrupted.  */
		if (error == -EFSCORRUPTED)
			break;

		cond_resched();

	} while (nr_found && !done);

	if (skipped) {
		delay(1);
		goto restart;
	}
	return last_error;
}
Example #21
0
/*
 * at transaction commit time we need to schedule the old roots for
 * deletion via btrfs_drop_snapshot.  This runs through all the
 * reference counted roots that were modified in the current
 * transaction and puts them into the drop list
 */
static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
                                    struct radix_tree_root *radix,
                                    struct list_head *list)
{
    struct btrfs_dirty_root *dirty;
    struct btrfs_root *gang[8];
    struct btrfs_root *root;
    int i;
    int ret;
    int err = 0;
    u32 refs;

    while (1) {
        ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
                                         ARRAY_SIZE(gang),
                                         BTRFS_ROOT_TRANS_TAG);
        if (ret == 0)
            break;
        for (i = 0; i < ret; i++) {
            root = gang[i];
            radix_tree_tag_clear(radix,
                                 (unsigned long)root->root_key.objectid,
                                 BTRFS_ROOT_TRANS_TAG);

            BUG_ON(!root->ref_tree);
            dirty = root->dirty_root;

            btrfs_free_log(trans, root);
            btrfs_free_reloc_root(trans, root);

            if (root->commit_root == root->node) {
                WARN_ON(root->node->start !=
                        btrfs_root_bytenr(&root->root_item));

                free_extent_buffer(root->commit_root);
                root->commit_root = NULL;
                root->dirty_root = NULL;

                spin_lock(&root->list_lock);
                list_del_init(&dirty->root->dead_list);
                spin_unlock(&root->list_lock);

                kfree(dirty->root);
                kfree(dirty);

                /* make sure to update the root on disk
                 * so we get any updates to the block used
                 * counts
                 */
                err = btrfs_update_root(trans,
                                        root->fs_info->tree_root,
                                        &root->root_key,
                                        &root->root_item);
                continue;
            }

            memset(&root->root_item.drop_progress, 0,
                   sizeof(struct btrfs_disk_key));
            root->root_item.drop_level = 0;
            root->commit_root = NULL;
            root->dirty_root = NULL;
            root->root_key.offset = root->fs_info->generation;
            btrfs_set_root_bytenr(&root->root_item,
                                  root->node->start);
            btrfs_set_root_level(&root->root_item,
                                 btrfs_header_level(root->node));
            btrfs_set_root_generation(&root->root_item,
                                      root->root_key.offset);

            err = btrfs_insert_root(trans, root->fs_info->tree_root,
                                    &root->root_key,
                                    &root->root_item);
            if (err)
                break;

            refs = btrfs_root_refs(&dirty->root->root_item);
            btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
            err = btrfs_update_root(trans, root->fs_info->tree_root,
                                    &dirty->root->root_key,
                                    &dirty->root->root_item);

            BUG_ON(err);
            if (refs == 1) {
                list_add(&dirty->list, list);
            } else {
                WARN_ON(1);
                free_extent_buffer(dirty->root->node);
                kfree(dirty->root);
                kfree(dirty);
            }
        }
    }
    return err;
}