static void evict_entry(struct drm_gem_object *obj, enum tiler_fmt fmt, struct usergart_entry *entry) { if (obj->dev->dev_mapping) { struct omap_gem_object *omap_obj = to_omap_bo(obj); int n = usergart[fmt].height; size_t size = PAGE_SIZE * n; loff_t off = mmap_offset(obj) + (entry->obj_pgoff << PAGE_SHIFT); const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); if (m > 1) { int i; /* if stride > than PAGE_SIZE then sparse mapping: */ for (i = n; i > 0; i--) { unmap_mapping_range(obj->dev->dev_mapping, off, PAGE_SIZE, 1); off += PAGE_SIZE * m; } } else { unmap_mapping_range(obj->dev->dev_mapping, off, size, 1); } } entry->obj = NULL; }
void fuse_truncate(struct address_space *mapping, loff_t offset) { /* See vmtruncate() */ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); }
/////////////////////////////////////////////////////////// // DropPages // // /////////////////////////////////////////////////////////// void DropPages( IN struct address_space* m ) { filemap_fdatawrite( m ); unmap_mapping_range( m, 0, 0, 1 ); truncate_inode_pages( m, 0 ); unmap_mapping_range( m, 0, 0, 1 ); }
void spu_unmap_mappings(struct spu_context *ctx) { if (ctx->local_store) unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); if (ctx->mfc) unmap_mapping_range(ctx->mfc, 0, 0x4000, 1); if (ctx->cntl) unmap_mapping_range(ctx->cntl, 0, 0x4000, 1); if (ctx->signal1) unmap_mapping_range(ctx->signal1, 0, 0x4000, 1); if (ctx->signal2) unmap_mapping_range(ctx->signal2, 0, 0x4000, 1); }
/** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode * @newsize: new file size * * inode's new i_size must already be written before truncate_pagecache * is called. * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache(struct inode *inode, loff_t newsize) { struct address_space *mapping = inode->i_mapping; loff_t holebegin = round_up(newsize, PAGE_SIZE); /* * unmap_mapping_range is called twice, first simply for * efficiency so that truncate_inode_pages does fewer * single-page unmaps. However after this first call, and * before truncate_inode_pages finishes, it is possible for * private pages to be COWed, which remain after * truncate_inode_pages finishes, hence the second * unmap_mapping_range call must be made for correctness. */ unmap_mapping_range(mapping, holebegin, 0, 1); truncate_inode_pages(mapping, newsize); unmap_mapping_range(mapping, holebegin, 0, 1); }
int truncate_inode_page(struct address_space *mapping, struct page *page) { if (page_mapped(page)) { unmap_mapping_range(mapping, (loff_t)page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, 0); } return truncate_complete_page(mapping, page); }
/** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ int nfs_sync_mapping(struct address_space *mapping) { int ret = 0; if (mapping->nrpages != 0) { unmap_mapping_range(mapping, 0, 0, 0); ret = nfs_wb_all(mapping->host); } return ret; }
void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; loff_t offset = (loff_t) bo->addr_space_offset; loff_t holelen = ((loff_t) bo->mem.num_pages) << PAGE_SHIFT; if (!bdev->dev_mapping) return; unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1); ttm_mem_io_free_vm(bo); }
static void evict_entry(struct drm_gem_object *obj, enum tiler_fmt fmt, struct usergart_entry *entry) { if (obj->dev->dev_mapping) { size_t size = PAGE_SIZE * usergart[fmt].height; loff_t off = mmap_offset(obj) + (entry->obj_pgoff << PAGE_SHIFT); unmap_mapping_range(obj->dev->dev_mapping, off, size, 1); } entry->obj = NULL; }
/* XXX put nice comment here. talk about __free_pte -> dirty pages and * nopage's reference passing to the pte */ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last) { int rc = -ENOENT; LASSERTF(last > first, "last %llu first %llu\n", last, first); if (mapping_mapped(mapping)) { rc = 0; unmap_mapping_range(mapping, first + PAGE_CACHE_SIZE - 1, last - first + 1, 0); } return rc; }
int truncate_inode_page(struct address_space *mapping, struct page *page) { loff_t holelen; VM_BUG_ON_PAGE(PageTail(page), page); holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE; if (page_mapped(page)) { unmap_mapping_range(mapping, (loff_t)page->index << PAGE_SHIFT, holelen, 0); } return truncate_complete_page(mapping, page); }
static void fuse_vmtruncate(struct _inode *inode, loff_t offset) { struct fuse_conn *fc = get_fuse_conn(inode); int need_trunc; spin_lock(&fc->lock); need_trunc = inode->i_size > offset; i_size_write(inode, offset); spin_unlock(&fc->lock); if (need_trunc) { struct address_space *mapping = inode->i_mapping; unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); } }
static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, const struct cl_object_conf *conf) { struct ll_inode_info *lli = ll_i2info(conf->coc_inode); if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n", PFID(&lli->lli_fid)); ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE); /* Clean up page mmap for this inode. * The reason for us to do this is that if the page has * already been installed into memory space, the process * can access it without interacting with lustre, so this * page may be stale due to layout change, and the process * will never be notified. * This operation is expensive but mmap processes have to pay * a price themselves. */ unmap_mapping_range(conf->coc_inode->i_mapping, 0, OBD_OBJECT_EOF, 0); return 0; } if (conf->coc_opc != OBJECT_CONF_SET) return 0; if (conf->u.coc_md && conf->u.coc_md->lsm) { CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n", PFID(&lli->lli_fid), lli->lli_layout_gen, conf->u.coc_md->lsm->lsm_layout_gen); lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm); ll_layout_version_set(lli, conf->u.coc_md->lsm->lsm_layout_gen); } else { CDEBUG(D_VFSTRACE, DFID ": layout nuked: %u.\n", PFID(&lli->lli_fid), lli->lli_layout_gen); lli->lli_has_smd = false; ll_layout_version_set(lli, LL_LAYOUT_GEN_EMPTY); } return 0; }
/* * Device passthrough support */ static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) { struct kvmppc_xive *xive = kvm->arch.xive; if (irq >= KVMPPC_XIVE_NR_IRQS) return -EINVAL; /* * Clear the ESB pages of the IRQ number being mapped (or * unmapped) into the guest and let the the VM fault handler * repopulate with the appropriate ESB pages (device or IC) */ pr_debug("clearing esb pages for girq 0x%lx\n", irq); mutex_lock(&xive->mapping_lock); if (xive->mapping) unmap_mapping_range(xive->mapping, irq * (2ull << PAGE_SHIFT), 2ull << PAGE_SHIFT, 1); mutex_unlock(&xive->mapping_lock); return 0; }
void spu_unmap_mappings(struct spu_context *ctx) { mutex_lock(&ctx->mapping_lock); if (ctx->local_store) unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); if (ctx->mfc) unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1); if (ctx->cntl) unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1); if (ctx->signal1) unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1); if (ctx->signal2) unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1); if (ctx->mss) unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1); if (ctx->psmap) unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1); mutex_unlock(&ctx->mapping_lock); }
/** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole * @lend: offset of last byte of hole * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; loff_t unmap_start = round_up(lstart, PAGE_SIZE); loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; /* * This rounding is currently just for example: unmap_mapping_range * expands its hole outwards, whereas we want it to contract the hole * inwards. However, existing callers of truncate_pagecache_range are * doing their own page rounding first. Note that unmap_mapping_range * allows holelen 0 for all, and we allow lend -1 for end of file. */ /* * Unlike in truncate_pagecache, unmap_mapping_range is called only * once (before truncating pagecache), and without "even_cows" flag: * hole-punching should not remove private COWed pages from the hole. */ if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); truncate_inode_pages_range(mapping, lstart, lend); }
static int ocfs2_truncate_file(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size) { int status = 0; struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_truncate_context *tc = NULL; mlog_entry("(inode = %llu, new_i_size = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(inode->i_mapping, new_i_size); fe = (struct ocfs2_dinode *) di_bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); status = -EIO; goto bail; } mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %llu, inode i_size = %lld != di " "i_size = %llu, i_flags = 0x%x\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), (unsigned long long)le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags)); if (new_i_size > le64_to_cpu(fe->i_size)) { mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", (unsigned long long)le64_to_cpu(fe->i_size), (unsigned long long)new_i_size); status = -EINVAL; mlog_errno(status); goto bail; } mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), (unsigned long long)le64_to_cpu(fe->i_size), (unsigned long long)new_i_size); /* lets handle the simple truncate cases before doing any more * cluster locking. */ if (new_i_size == le64_to_cpu(fe->i_size)) goto bail; /* This forces other nodes to sync and drop their pages. Do * this even if we have a truncate without allocation change - * ocfs2 cluster sizes can be much greater than page size, so * we have to truncate them anyway. */ status = ocfs2_data_lock(inode, 1); if (status < 0) { mlog_errno(status); goto bail; } /* alright, we're going to need to do a full blown alloc size * change. Orphan the inode so that recovery can complete the * truncate if necessary. This does the task of marking * i_size. */ status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); if (status < 0) { mlog_errno(status); goto bail_unlock_data; } status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); if (status < 0) { mlog_errno(status); goto bail_unlock_data; } status = ocfs2_commit_truncate(osb, inode, di_bh, tc); if (status < 0) { mlog_errno(status); goto bail_unlock_data; } /* TODO: orphan dir cleanup here. */ bail_unlock_data: ocfs2_data_unlock(inode, 1); bail: mlog_exit(status); return status; }
/** * invalidate_inode_pages2_range - remove range of pages from an address_space * @mapping: the address_space * @start: the page offset 'from' which to invalidate * @end: the page offset 'to' which to invalidate (inclusive) * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Returns -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; pgoff_t index; int i; int ret = 0; int ret2 = 0; int did_range_unmap = 0; cleancache_invalidate_inode(mapping); pagevec_init(&pvec, 0); index = start; while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, indices)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; if (index > end) break; if (radix_tree_exceptional_entry(page)) { clear_exceptional_entry(mapping, index, page); continue; } lock_page(page); WARN_ON(page->index != index); if (page->mapping != mapping) { unlock_page(page); continue; } wait_on_page_writeback(page); if (page_mapped(page)) { if (!did_range_unmap) { /* * Zap the rest of the file in one hit. */ unmap_mapping_range(mapping, (loff_t)index << PAGE_CACHE_SHIFT, (loff_t)(1 + end - index) << PAGE_CACHE_SHIFT, 0); did_range_unmap = 1; } else { /* * Just zap this page */ unmap_mapping_range(mapping, (loff_t)index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, 0); } } BUG_ON(page_mapped(page)); ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page)) ret2 = -EBUSY; } if (ret2 < 0) ret = ret2; unlock_page(page); } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); cond_resched(); index++; } cleancache_invalidate_inode(mapping); return ret; }
static int copy_user_bh(struct page *to, struct inode *inode, struct buffer_head *bh, unsigned long vaddr) { struct blk_dax_ctl dax = { .sector = to_sector(bh, inode), .size = bh->b_size, }; struct block_device *bdev = bh->b_bdev; void *vto; if (dax_map_atomic(bdev, &dax) < 0) return PTR_ERR(dax.addr); vto = kmap_atomic(to); copy_user_page(vto, (void __force *)dax.addr, vaddr, to); kunmap_atomic(vto); dax_unmap_atomic(bdev, &dax); return 0; } #define NO_SECTOR -1 #define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT)) static int dax_radix_entry(struct address_space *mapping, pgoff_t index, sector_t sector, bool pmd_entry, bool dirty) { struct radix_tree_root *page_tree = &mapping->page_tree; pgoff_t pmd_index = DAX_PMD_INDEX(index); int type, error = 0; void *entry; WARN_ON_ONCE(pmd_entry && !dirty); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); spin_lock_irq(&mapping->tree_lock); entry = radix_tree_lookup(page_tree, pmd_index); if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) { index = pmd_index; goto dirty; } entry = radix_tree_lookup(page_tree, index); if (entry) { type = RADIX_DAX_TYPE(entry); if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) { error = -EIO; goto unlock; } if (!pmd_entry || type == RADIX_DAX_PMD) goto dirty; /* * We only insert dirty PMD entries into the radix tree. This * means we don't need to worry about removing a dirty PTE * entry and inserting a clean PMD entry, thus reducing the * range we would flush with a follow-up fsync/msync call. */ radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; } if (sector == NO_SECTOR) { /* * This can happen during correct operation if our pfn_mkwrite * fault raced against a hole punch operation. If this * happens the pte that was hole punched will have been * unmapped and the radix tree entry will have been removed by * the time we are called, but the call will still happen. We * will return all the way up to wp_pfn_shared(), where the * pte_same() check will fail, eventually causing page fault * to be retried by the CPU. */ goto unlock; } error = radix_tree_insert(page_tree, index, RADIX_DAX_ENTRY(sector, pmd_entry)); if (error) goto unlock; mapping->nrexceptional++; dirty: if (dirty) radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); unlock: spin_unlock_irq(&mapping->tree_lock); return error; } static int dax_writeback_one(struct block_device *bdev, struct address_space *mapping, pgoff_t index, void *entry) { struct radix_tree_root *page_tree = &mapping->page_tree; int type = RADIX_DAX_TYPE(entry); struct radix_tree_node *node; struct blk_dax_ctl dax; void **slot; int ret = 0; spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even * without the tree itself locked. These unlocked entries * need verification under the tree lock. */ if (!__radix_tree_lookup(page_tree, index, &node, &slot)) goto unlock; if (*slot != entry) goto unlock; /* another fsync thread may have already written back this entry */ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) goto unlock; if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) { ret = -EIO; goto unlock; } dax.sector = RADIX_DAX_SECTOR(entry); dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE); spin_unlock_irq(&mapping->tree_lock); /* * We cannot hold tree_lock while calling dax_map_atomic() because it * eventually calls cond_resched(). */ ret = dax_map_atomic(bdev, &dax); if (ret < 0) return ret; if (WARN_ON_ONCE(ret < dax.size)) { ret = -EIO; goto unmap; } wb_cache_pmem(dax.addr, dax.size); spin_lock_irq(&mapping->tree_lock); radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE); spin_unlock_irq(&mapping->tree_lock); unmap: dax_unmap_atomic(bdev, &dax); return ret; unlock: spin_unlock_irq(&mapping->tree_lock); return ret; } /* * Flush the mapping to the persistent domain within the byte range of [start, * end]. This is required by data integrity operations to ensure file data is * on persistent storage prior to completion of the operation. */ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc) { struct inode *inode = mapping->host; pgoff_t start_index, end_index, pmd_index; pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; bool done = false; int i, ret = 0; void *entry; if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; pmd_index = DAX_PMD_INDEX(start_index); rcu_read_lock(); entry = radix_tree_lookup(&mapping->page_tree, pmd_index); rcu_read_unlock(); /* see if the start of our range is covered by a PMD entry */ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) start_index = pmd_index; tag_pages_for_writeback(mapping, start_index, end_index); pagevec_init(&pvec, 0); while (!done) { pvec.nr = find_get_entries_tag(mapping, start_index, PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, pvec.pages, indices); if (pvec.nr == 0) break; for (i = 0; i < pvec.nr; i++) { if (indices[i] > end_index) { done = true; break; } ret = dax_writeback_one(bdev, mapping, indices[i], pvec.pages[i]); if (ret < 0) return ret; } } wmb_pmem(); return 0; } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct vm_area_struct *vma, struct vm_fault *vmf) { unsigned long vaddr = (unsigned long)vmf->virtual_address; struct address_space *mapping = inode->i_mapping; struct block_device *bdev = bh->b_bdev; struct blk_dax_ctl dax = { .sector = to_sector(bh, inode), .size = bh->b_size, }; pgoff_t size; int error; i_mmap_lock_read(mapping); /* * Check truncate didn't happen while we were allocating a block. * If it did, this block may or may not be still allocated to the * file. We can't tell the filesystem to free it because we can't * take i_mutex here. In the worst case, the file still has blocks * allocated past the end of the file. */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (unlikely(vmf->pgoff >= size)) { error = -EIO; goto out; } if (dax_map_atomic(bdev, &dax) < 0) { error = PTR_ERR(dax.addr); goto out; } if (buffer_unwritten(bh) || buffer_new(bh)) { clear_pmem(dax.addr, PAGE_SIZE); wmb_pmem(); } dax_unmap_atomic(bdev, &dax); error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, vmf->flags & FAULT_FLAG_WRITE); if (error) goto out; error = vm_insert_mixed(vma, vaddr, dax.pfn); out: i_mmap_unlock_read(mapping); return error; } /** * __dax_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @get_block: The filesystem method used to translate file offsets to blocks * @complete_unwritten: The filesystem method used to convert unwritten blocks * to written so the data written to them is exposed. This is required for * required by write faults for filesystems that will return unwritten * extent mappings from @get_block, but it is optional for reads as * dax_insert_mapping() will always zero unwritten blocks. If the fs does * not support unwritten extents, the it should pass NULL. * * When a page fault occurs, filesystems may call this helper in their * fault handler for DAX files. __dax_fault() assumes the caller has done all * the necessary locking for the page fault to proceed successfully. */ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block, dax_iodone_t complete_unwritten) { struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct page *page; struct buffer_head bh; unsigned long vaddr = (unsigned long)vmf->virtual_address; unsigned blkbits = inode->i_blkbits; sector_t block; pgoff_t size; int error; int major = 0; size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) return VM_FAULT_SIGBUS; memset(&bh, 0, sizeof(bh)); block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits); bh.b_bdev = inode->i_sb->s_bdev; bh.b_size = PAGE_SIZE; repeat: page = find_get_page(mapping, vmf->pgoff); if (page) { if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { put_page(page); return VM_FAULT_RETRY; } if (unlikely(page->mapping != mapping)) { unlock_page(page); put_page(page); goto repeat; } size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (unlikely(vmf->pgoff >= size)) { /* * We have a struct page covering a hole in the file * from a read fault and we've raced with a truncate */ error = -EIO; goto unlock_page; } } error = get_block(inode, block, &bh, 0); if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; /* fs corruption? */ if (error) goto unlock_page; if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { if (vmf->flags & FAULT_FLAG_WRITE) { error = get_block(inode, block, &bh, 1); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; if (error) goto unlock_page; } else { return dax_load_hole(mapping, page, vmf); } } if (vmf->cow_page) { struct page *new_page = vmf->cow_page; if (buffer_written(&bh)) error = copy_user_bh(new_page, inode, &bh, vaddr); else clear_user_highpage(new_page, vaddr); if (error) goto unlock_page; vmf->page = page; if (!page) { i_mmap_lock_read(mapping); /* Check we didn't race with truncate */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) { i_mmap_unlock_read(mapping); error = -EIO; goto out; } } return VM_FAULT_LOCKED; } /* Check we didn't race with a read fault installing a new page */ if (!page && major) page = find_lock_page(mapping, vmf->pgoff); if (page) { unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, PAGE_SIZE, 0); delete_from_page_cache(page); unlock_page(page); put_page(page); page = NULL; } /* * If we successfully insert the new mapping over an unwritten extent, * we need to ensure we convert the unwritten extent. If there is an * error inserting the mapping, the filesystem needs to leave it as * unwritten to prevent exposure of the stale underlying data to * userspace, but we still need to call the completion function so * the private resources on the mapping buffer can be released. We * indicate what the callback should do via the uptodate variable, same * as for normal BH based IO completions. */ error = dax_insert_mapping(inode, &bh, vma, vmf); if (buffer_unwritten(&bh)) { if (complete_unwritten) complete_unwritten(&bh, !error); else WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); } out: if (error == -ENOMEM) return VM_FAULT_OOM | major; /* -EBUSY is fine, somebody else faulted on the same PTE */ if ((error < 0) && (error != -EBUSY)) return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; unlock_page: if (page) { unlock_page(page); put_page(page); } goto out; }
void spu_unmap_mappings(struct spu_context *ctx) { unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); }