Exemple #1
0
static void evict_entry(struct drm_gem_object *obj,
		enum tiler_fmt fmt, struct usergart_entry *entry)
{
	if (obj->dev->dev_mapping) {
		struct omap_gem_object *omap_obj = to_omap_bo(obj);
		int n = usergart[fmt].height;
		size_t size = PAGE_SIZE * n;
		loff_t off = mmap_offset(obj) +
				(entry->obj_pgoff << PAGE_SHIFT);
		const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE);
		if (m > 1) {
			int i;
			/* if stride > than PAGE_SIZE then sparse mapping: */
			for (i = n; i > 0; i--) {
				unmap_mapping_range(obj->dev->dev_mapping,
						off, PAGE_SIZE, 1);
				off += PAGE_SIZE * m;
			}
		} else {
			unmap_mapping_range(obj->dev->dev_mapping, off, size, 1);
		}
	}

	entry->obj = NULL;
}
Exemple #2
0
void fuse_truncate(struct address_space *mapping, loff_t offset)
{
	/* See vmtruncate() */
	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
	truncate_inode_pages(mapping, offset);
	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
Exemple #3
0
///////////////////////////////////////////////////////////
// DropPages
//
//
///////////////////////////////////////////////////////////
void
DropPages(
    IN struct address_space* m
    )
{
  filemap_fdatawrite( m );
  unmap_mapping_range( m, 0, 0, 1 );
  truncate_inode_pages( m, 0 );
  unmap_mapping_range( m, 0, 0, 1 );
}
Exemple #4
0
void spu_unmap_mappings(struct spu_context *ctx)
{
	if (ctx->local_store)
		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
	if (ctx->mfc)
		unmap_mapping_range(ctx->mfc, 0, 0x4000, 1);
	if (ctx->cntl)
		unmap_mapping_range(ctx->cntl, 0, 0x4000, 1);
	if (ctx->signal1)
		unmap_mapping_range(ctx->signal1, 0, 0x4000, 1);
	if (ctx->signal2)
		unmap_mapping_range(ctx->signal2, 0, 0x4000, 1);
}
Exemple #5
0
/**
 * truncate_pagecache - unmap and remove pagecache that has been truncated
 * @inode: inode
 * @newsize: new file size
 *
 * inode's new i_size must already be written before truncate_pagecache
 * is called.
 *
 * This function should typically be called before the filesystem
 * releases resources associated with the freed range (eg. deallocates
 * blocks). This way, pagecache will always stay logically coherent
 * with on-disk format, and the filesystem would not have to deal with
 * situations such as writepage being called for a page that has already
 * had its underlying blocks deallocated.
 */
void truncate_pagecache(struct inode *inode, loff_t newsize)
{
    struct address_space *mapping = inode->i_mapping;
    loff_t holebegin = round_up(newsize, PAGE_SIZE);

    /*
     * unmap_mapping_range is called twice, first simply for
     * efficiency so that truncate_inode_pages does fewer
     * single-page unmaps.  However after this first call, and
     * before truncate_inode_pages finishes, it is possible for
     * private pages to be COWed, which remain after
     * truncate_inode_pages finishes, hence the second
     * unmap_mapping_range call must be made for correctness.
     */
    unmap_mapping_range(mapping, holebegin, 0, 1);
    truncate_inode_pages(mapping, newsize);
    unmap_mapping_range(mapping, holebegin, 0, 1);
}
Exemple #6
0
int truncate_inode_page(struct address_space *mapping, struct page *page)
{
    if (page_mapped(page)) {
        unmap_mapping_range(mapping,
                            (loff_t)page->index << PAGE_CACHE_SHIFT,
                            PAGE_CACHE_SIZE, 0);
    }
    return truncate_complete_page(mapping, page);
}
/**
 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
 */
int nfs_sync_mapping(struct address_space *mapping)
{
	int ret = 0;

	if (mapping->nrpages != 0) {
		unmap_mapping_range(mapping, 0, 0, 0);
		ret = nfs_wb_all(mapping->host);
	}
	return ret;
}
Exemple #8
0
void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo)
{
	struct ttm_bo_device *bdev = bo->bdev;
	loff_t offset = (loff_t) bo->addr_space_offset;
	loff_t holelen = ((loff_t) bo->mem.num_pages) << PAGE_SHIFT;

	if (!bdev->dev_mapping)
		return;
	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
	ttm_mem_io_free_vm(bo);
}
Exemple #9
0
static void evict_entry(struct drm_gem_object *obj,
		enum tiler_fmt fmt, struct usergart_entry *entry)
{
	if (obj->dev->dev_mapping) {
		size_t size = PAGE_SIZE * usergart[fmt].height;
		loff_t off = mmap_offset(obj) +
				(entry->obj_pgoff << PAGE_SHIFT);
		unmap_mapping_range(obj->dev->dev_mapping, off, size, 1);
	}

	entry->obj = NULL;
}
Exemple #10
0
/* XXX put nice comment here.  talk about __free_pte -> dirty pages and
 * nopage's reference passing to the pte
 */
int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
{
	int rc = -ENOENT;

	LASSERTF(last > first, "last %llu first %llu\n", last, first);
	if (mapping_mapped(mapping)) {
		rc = 0;
		unmap_mapping_range(mapping, first + PAGE_CACHE_SIZE - 1,
				    last - first + 1, 0);
	}

	return rc;
}
Exemple #11
0
int truncate_inode_page(struct address_space *mapping, struct page *page)
{
	loff_t holelen;
	VM_BUG_ON_PAGE(PageTail(page), page);

	holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
	if (page_mapped(page)) {
		unmap_mapping_range(mapping,
				   (loff_t)page->index << PAGE_SHIFT,
				   holelen, 0);
	}
	return truncate_complete_page(mapping, page);
}
Exemple #12
0
static void fuse_vmtruncate(struct _inode *inode, loff_t offset)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	int need_trunc;

	spin_lock(&fc->lock);
	need_trunc = inode->i_size > offset;
	i_size_write(inode, offset);
	spin_unlock(&fc->lock);

	if (need_trunc) {
		struct address_space *mapping = inode->i_mapping;
		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
		truncate_inode_pages(mapping, offset);
	}
}
Exemple #13
0
static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
			const struct cl_object_conf *conf)
{
	struct ll_inode_info *lli = ll_i2info(conf->coc_inode);

	if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
		CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n",
		       PFID(&lli->lli_fid));

		ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE);

		/* Clean up page mmap for this inode.
		 * The reason for us to do this is that if the page has
		 * already been installed into memory space, the process
		 * can access it without interacting with lustre, so this
		 * page may be stale due to layout change, and the process
		 * will never be notified.
		 * This operation is expensive but mmap processes have to pay
		 * a price themselves.
		 */
		unmap_mapping_range(conf->coc_inode->i_mapping,
				    0, OBD_OBJECT_EOF, 0);

		return 0;
	}

	if (conf->coc_opc != OBJECT_CONF_SET)
		return 0;

	if (conf->u.coc_md && conf->u.coc_md->lsm) {
		CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
		       PFID(&lli->lli_fid), lli->lli_layout_gen,
		       conf->u.coc_md->lsm->lsm_layout_gen);

		lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm);
		ll_layout_version_set(lli, conf->u.coc_md->lsm->lsm_layout_gen);
	} else {
		CDEBUG(D_VFSTRACE, DFID ": layout nuked: %u.\n",
		       PFID(&lli->lli_fid), lli->lli_layout_gen);

		lli->lli_has_smd = false;
		ll_layout_version_set(lli, LL_LAYOUT_GEN_EMPTY);
	}
	return 0;
}
/*
 * Device passthrough support
 */
static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
{
	struct kvmppc_xive *xive = kvm->arch.xive;

	if (irq >= KVMPPC_XIVE_NR_IRQS)
		return -EINVAL;

	/*
	 * Clear the ESB pages of the IRQ number being mapped (or
	 * unmapped) into the guest and let the the VM fault handler
	 * repopulate with the appropriate ESB pages (device or IC)
	 */
	pr_debug("clearing esb pages for girq 0x%lx\n", irq);
	mutex_lock(&xive->mapping_lock);
	if (xive->mapping)
		unmap_mapping_range(xive->mapping,
				    irq * (2ull << PAGE_SHIFT),
				    2ull << PAGE_SHIFT, 1);
	mutex_unlock(&xive->mapping_lock);
	return 0;
}
Exemple #15
0
void spu_unmap_mappings(struct spu_context *ctx)
{
	mutex_lock(&ctx->mapping_lock);
	if (ctx->local_store)
		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
	if (ctx->mfc)
		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
	if (ctx->cntl)
		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
	if (ctx->signal1)
		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
	if (ctx->signal2)
		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
	if (ctx->mss)
		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
	if (ctx->psmap)
		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
	mutex_unlock(&ctx->mapping_lock);
}
Exemple #16
0
/**
 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
 * @inode: inode
 * @lstart: offset of beginning of hole
 * @lend: offset of last byte of hole
 *
 * This function should typically be called before the filesystem
 * releases resources associated with the freed range (eg. deallocates
 * blocks). This way, pagecache will always stay logically coherent
 * with on-disk format, and the filesystem would not have to deal with
 * situations such as writepage being called for a page that has already
 * had its underlying blocks deallocated.
 */
void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
{
    struct address_space *mapping = inode->i_mapping;
    loff_t unmap_start = round_up(lstart, PAGE_SIZE);
    loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
    /*
     * This rounding is currently just for example: unmap_mapping_range
     * expands its hole outwards, whereas we want it to contract the hole
     * inwards.  However, existing callers of truncate_pagecache_range are
     * doing their own page rounding first.  Note that unmap_mapping_range
     * allows holelen 0 for all, and we allow lend -1 for end of file.
     */

    /*
     * Unlike in truncate_pagecache, unmap_mapping_range is called only
     * once (before truncating pagecache), and without "even_cows" flag:
     * hole-punching should not remove private COWed pages from the hole.
     */
    if ((u64)unmap_end > (u64)unmap_start)
        unmap_mapping_range(mapping, unmap_start,
                            1 + unmap_end - unmap_start, 0);
    truncate_inode_pages_range(mapping, lstart, lend);
}
Exemple #17
0
static int ocfs2_truncate_file(struct inode *inode,
			       struct buffer_head *di_bh,
			       u64 new_i_size)
{
	int status = 0;
	struct ocfs2_dinode *fe = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_truncate_context *tc = NULL;

	mlog_entry("(inode = %llu, new_i_size = %llu\n",
		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
		   (unsigned long long)new_i_size);

	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
	truncate_inode_pages(inode->i_mapping, new_i_size);

	fe = (struct ocfs2_dinode *) di_bh->b_data;
	if (!OCFS2_IS_VALID_DINODE(fe)) {
		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
		status = -EIO;
		goto bail;
	}

	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
			"Inode %llu, inode i_size = %lld != di "
			"i_size = %llu, i_flags = 0x%x\n",
			(unsigned long long)OCFS2_I(inode)->ip_blkno,
			i_size_read(inode),
			(unsigned long long)le64_to_cpu(fe->i_size),
			le32_to_cpu(fe->i_flags));

	if (new_i_size > le64_to_cpu(fe->i_size)) {
		mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
		     (unsigned long long)le64_to_cpu(fe->i_size),
		     (unsigned long long)new_i_size);
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
	     (unsigned long long)le64_to_cpu(fe->i_blkno),
	     (unsigned long long)le64_to_cpu(fe->i_size),
	     (unsigned long long)new_i_size);

	/* lets handle the simple truncate cases before doing any more
	 * cluster locking. */
	if (new_i_size == le64_to_cpu(fe->i_size))
		goto bail;

	/* This forces other nodes to sync and drop their pages. Do
	 * this even if we have a truncate without allocation change -
	 * ocfs2 cluster sizes can be much greater than page size, so
	 * we have to truncate them anyway.  */
	status = ocfs2_data_lock(inode, 1);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	/* alright, we're going to need to do a full blown alloc size
	 * change. Orphan the inode so that recovery can complete the
	 * truncate if necessary. This does the task of marking
	 * i_size. */
	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
	if (status < 0) {
		mlog_errno(status);
		goto bail_unlock_data;
	}

	status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
	if (status < 0) {
		mlog_errno(status);
		goto bail_unlock_data;
	}

	status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
	if (status < 0) {
		mlog_errno(status);
		goto bail_unlock_data;
	}

	/* TODO: orphan dir cleanup here. */
bail_unlock_data:
	ocfs2_data_unlock(inode, 1);

bail:

	mlog_exit(status);
	return status;
}
Exemple #18
0
/**
 * invalidate_inode_pages2_range - remove range of pages from an address_space
 * @mapping: the address_space
 * @start: the page offset 'from' which to invalidate
 * @end: the page offset 'to' which to invalidate (inclusive)
 *
 * Any pages which are found to be mapped into pagetables are unmapped prior to
 * invalidation.
 *
 * Returns -EBUSY if any pages could not be invalidated.
 */
int invalidate_inode_pages2_range(struct address_space *mapping,
                                  pgoff_t start, pgoff_t end)
{
    pgoff_t indices[PAGEVEC_SIZE];
    struct pagevec pvec;
    pgoff_t index;
    int i;
    int ret = 0;
    int ret2 = 0;
    int did_range_unmap = 0;

    cleancache_invalidate_inode(mapping);
    pagevec_init(&pvec, 0);
    index = start;
    while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
            min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
            indices)) {
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index > end)
                break;

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            lock_page(page);
            WARN_ON(page->index != index);
            if (page->mapping != mapping) {
                unlock_page(page);
                continue;
            }
            wait_on_page_writeback(page);
            if (page_mapped(page)) {
                if (!did_range_unmap) {
                    /*
                     * Zap the rest of the file in one hit.
                     */
                    unmap_mapping_range(mapping,
                                        (loff_t)index << PAGE_CACHE_SHIFT,
                                        (loff_t)(1 + end - index)
                                        << PAGE_CACHE_SHIFT,
                                        0);
                    did_range_unmap = 1;
                } else {
                    /*
                     * Just zap this page
                     */
                    unmap_mapping_range(mapping,
                                        (loff_t)index << PAGE_CACHE_SHIFT,
                                        PAGE_CACHE_SIZE, 0);
                }
            }
            BUG_ON(page_mapped(page));
            ret2 = do_launder_page(mapping, page);
            if (ret2 == 0) {
                if (!invalidate_complete_page2(mapping, page))
                    ret2 = -EBUSY;
            }
            if (ret2 < 0)
                ret = ret2;
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        cond_resched();
        index++;
    }
    cleancache_invalidate_inode(mapping);
    return ret;
}
Exemple #19
0
static int copy_user_bh(struct page *to, struct inode *inode,
		struct buffer_head *bh, unsigned long vaddr)
{
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	struct block_device *bdev = bh->b_bdev;
	void *vto;

	if (dax_map_atomic(bdev, &dax) < 0)
		return PTR_ERR(dax.addr);
	vto = kmap_atomic(to);
	copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
	kunmap_atomic(vto);
	dax_unmap_atomic(bdev, &dax);
	return 0;
}

#define NO_SECTOR -1
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))

static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
		sector_t sector, bool pmd_entry, bool dirty)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	pgoff_t pmd_index = DAX_PMD_INDEX(index);
	int type, error = 0;
	void *entry;

	WARN_ON_ONCE(pmd_entry && !dirty);
	if (dirty)
		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

	spin_lock_irq(&mapping->tree_lock);

	entry = radix_tree_lookup(page_tree, pmd_index);
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
		index = pmd_index;
		goto dirty;
	}

	entry = radix_tree_lookup(page_tree, index);
	if (entry) {
		type = RADIX_DAX_TYPE(entry);
		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
					type != RADIX_DAX_PMD)) {
			error = -EIO;
			goto unlock;
		}

		if (!pmd_entry || type == RADIX_DAX_PMD)
			goto dirty;

		/*
		 * We only insert dirty PMD entries into the radix tree.  This
		 * means we don't need to worry about removing a dirty PTE
		 * entry and inserting a clean PMD entry, thus reducing the
		 * range we would flush with a follow-up fsync/msync call.
		 */
		radix_tree_delete(&mapping->page_tree, index);
		mapping->nrexceptional--;
	}

	if (sector == NO_SECTOR) {
		/*
		 * This can happen during correct operation if our pfn_mkwrite
		 * fault raced against a hole punch operation.  If this
		 * happens the pte that was hole punched will have been
		 * unmapped and the radix tree entry will have been removed by
		 * the time we are called, but the call will still happen.  We
		 * will return all the way up to wp_pfn_shared(), where the
		 * pte_same() check will fail, eventually causing page fault
		 * to be retried by the CPU.
		 */
		goto unlock;
	}

	error = radix_tree_insert(page_tree, index,
			RADIX_DAX_ENTRY(sector, pmd_entry));
	if (error)
		goto unlock;

	mapping->nrexceptional++;
 dirty:
	if (dirty)
		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return error;
}

static int dax_writeback_one(struct block_device *bdev,
		struct address_space *mapping, pgoff_t index, void *entry)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	int type = RADIX_DAX_TYPE(entry);
	struct radix_tree_node *node;
	struct blk_dax_ctl dax;
	void **slot;
	int ret = 0;

	spin_lock_irq(&mapping->tree_lock);
	/*
	 * Regular page slots are stabilized by the page lock even
	 * without the tree itself locked.  These unlocked entries
	 * need verification under the tree lock.
	 */
	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
		goto unlock;
	if (*slot != entry)
		goto unlock;

	/* another fsync thread may have already written back this entry */
	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
		goto unlock;

	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
		ret = -EIO;
		goto unlock;
	}

	dax.sector = RADIX_DAX_SECTOR(entry);
	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
	spin_unlock_irq(&mapping->tree_lock);

	/*
	 * We cannot hold tree_lock while calling dax_map_atomic() because it
	 * eventually calls cond_resched().
	 */
	ret = dax_map_atomic(bdev, &dax);
	if (ret < 0)
		return ret;

	if (WARN_ON_ONCE(ret < dax.size)) {
		ret = -EIO;
		goto unmap;
	}

	wb_cache_pmem(dax.addr, dax.size);

	spin_lock_irq(&mapping->tree_lock);
	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
	spin_unlock_irq(&mapping->tree_lock);
 unmap:
	dax_unmap_atomic(bdev, &dax);
	return ret;

 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return ret;
}

/*
 * Flush the mapping to the persistent domain within the byte range of [start,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
int dax_writeback_mapping_range(struct address_space *mapping,
		struct block_device *bdev, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	pgoff_t start_index, end_index, pmd_index;
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	bool done = false;
	int i, ret = 0;
	void *entry;

	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
		return -EIO;

	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
		return 0;

	start_index = wbc->range_start >> PAGE_SHIFT;
	end_index = wbc->range_end >> PAGE_SHIFT;
	pmd_index = DAX_PMD_INDEX(start_index);

	rcu_read_lock();
	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
	rcu_read_unlock();

	/* see if the start of our range is covered by a PMD entry */
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
		start_index = pmd_index;

	tag_pages_for_writeback(mapping, start_index, end_index);

	pagevec_init(&pvec, 0);
	while (!done) {
		pvec.nr = find_get_entries_tag(mapping, start_index,
				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
				pvec.pages, indices);

		if (pvec.nr == 0)
			break;

		for (i = 0; i < pvec.nr; i++) {
			if (indices[i] > end_index) {
				done = true;
				break;
			}

			ret = dax_writeback_one(bdev, mapping, indices[i],
					pvec.pages[i]);
			if (ret < 0)
				return ret;
		}
	}
	wmb_pmem();
	return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
			struct vm_area_struct *vma, struct vm_fault *vmf)
{
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	struct address_space *mapping = inode->i_mapping;
	struct block_device *bdev = bh->b_bdev;
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	pgoff_t size;
	int error;

	i_mmap_lock_read(mapping);

	/*
	 * Check truncate didn't happen while we were allocating a block.
	 * If it did, this block may or may not be still allocated to the
	 * file.  We can't tell the filesystem to free it because we can't
	 * take i_mutex here.  In the worst case, the file still has blocks
	 * allocated past the end of the file.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (unlikely(vmf->pgoff >= size)) {
		error = -EIO;
		goto out;
	}

	if (dax_map_atomic(bdev, &dax) < 0) {
		error = PTR_ERR(dax.addr);
		goto out;
	}

	if (buffer_unwritten(bh) || buffer_new(bh)) {
		clear_pmem(dax.addr, PAGE_SIZE);
		wmb_pmem();
	}
	dax_unmap_atomic(bdev, &dax);

	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
			vmf->flags & FAULT_FLAG_WRITE);
	if (error)
		goto out;

	error = vm_insert_mixed(vma, vaddr, dax.pfn);

 out:
	i_mmap_unlock_read(mapping);

	return error;
}

/**
 * __dax_fault - handle a page fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
 * the necessary locking for the page fault to proceed successfully.
 */
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	struct buffer_head bh;
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	unsigned blkbits = inode->i_blkbits;
	sector_t block;
	pgoff_t size;
	int error;
	int major = 0;

	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		return VM_FAULT_SIGBUS;

	memset(&bh, 0, sizeof(bh));
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
	bh.b_bdev = inode->i_sb->s_bdev;
	bh.b_size = PAGE_SIZE;

 repeat:
	page = find_get_page(mapping, vmf->pgoff);
	if (page) {
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
			put_page(page);
			return VM_FAULT_RETRY;
		}
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			put_page(page);
			goto repeat;
		}
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
		if (unlikely(vmf->pgoff >= size)) {
			/*
			 * We have a struct page covering a hole in the file
			 * from a read fault and we've raced with a truncate
			 */
			error = -EIO;
			goto unlock_page;
		}
	}

	error = get_block(inode, block, &bh, 0);
	if (!error && (bh.b_size < PAGE_SIZE))
		error = -EIO;		/* fs corruption? */
	if (error)
		goto unlock_page;

	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
		if (vmf->flags & FAULT_FLAG_WRITE) {
			error = get_block(inode, block, &bh, 1);
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			major = VM_FAULT_MAJOR;
			if (!error && (bh.b_size < PAGE_SIZE))
				error = -EIO;
			if (error)
				goto unlock_page;
		} else {
			return dax_load_hole(mapping, page, vmf);
		}
	}

	if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
			error = copy_user_bh(new_page, inode, &bh, vaddr);
		else
			clear_user_highpage(new_page, vaddr);
		if (error)
			goto unlock_page;
		vmf->page = page;
		if (!page) {
			i_mmap_lock_read(mapping);
			/* Check we didn't race with truncate */
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
								PAGE_SHIFT;
			if (vmf->pgoff >= size) {
				i_mmap_unlock_read(mapping);
				error = -EIO;
				goto out;
			}
		}
		return VM_FAULT_LOCKED;
	}

	/* Check we didn't race with a read fault installing a new page */
	if (!page && major)
		page = find_lock_page(mapping, vmf->pgoff);

	if (page) {
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
							PAGE_SIZE, 0);
		delete_from_page_cache(page);
		unlock_page(page);
		put_page(page);
		page = NULL;
	}

	/*
	 * If we successfully insert the new mapping over an unwritten extent,
	 * we need to ensure we convert the unwritten extent. If there is an
	 * error inserting the mapping, the filesystem needs to leave it as
	 * unwritten to prevent exposure of the stale underlying data to
	 * userspace, but we still need to call the completion function so
	 * the private resources on the mapping buffer can be released. We
	 * indicate what the callback should do via the uptodate variable, same
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
		return VM_FAULT_OOM | major;
	/* -EBUSY is fine, somebody else faulted on the same PTE */
	if ((error < 0) && (error != -EBUSY))
		return VM_FAULT_SIGBUS | major;
	return VM_FAULT_NOPAGE | major;

 unlock_page:
	if (page) {
		unlock_page(page);
		put_page(page);
	}
	goto out;
}
Exemple #20
0
void spu_unmap_mappings(struct spu_context *ctx)
{
	unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
}