コード例 #1
0
ファイル: msm_gem.c プロジェクト: 513855417/linux
int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct drm_gem_object *obj = vma->vm_private_data;
	struct drm_device *dev = obj->dev;
	struct page **pages;
	unsigned long pfn;
	pgoff_t pgoff;
	int ret;

	/* Make sure we don't parallel update on a fault, nor move or remove
	 * something from beneath our feet
	 */
	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		goto out;

	/* make sure we have pages attached now */
	pages = get_pages(obj);
	if (IS_ERR(pages)) {
		ret = PTR_ERR(pages);
		goto out_unlock;
	}

	/* We don't use vmf->pgoff since that has the fake offset: */
	pgoff = ((unsigned long)vmf->virtual_address -
			vma->vm_start) >> PAGE_SHIFT;

	pfn = page_to_pfn(pages[pgoff]);

	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
			pfn, pfn << PAGE_SHIFT);

	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
			__pfn_to_pfn_t(pfn, PFN_DEV));

out_unlock:
	mutex_unlock(&dev->struct_mutex);
out:
	switch (ret) {
	case -EAGAIN:
	case 0:
	case -ERESTARTSYS:
	case -EINTR:
	case -EBUSY:
		/*
		 * EBUSY is ok: this just means that another thread
		 * already did the job.
		 */
		return VM_FAULT_NOPAGE;
	case -ENOMEM:
		return VM_FAULT_OOM;
	default:
		return VM_FAULT_SIGBUS;
	}
}
コード例 #2
0
ファイル: drv_vma_fault.c プロジェクト: gurugio/ldd
int simple_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{	
	size_t size = vma->vm_end - vma->vm_start;
	int i;
	int ret;
	struct page *map_page;
	char *ptr;

	printk(KERN_NOTICE "vmf:fault=%p flag=%x offset=%lx\n",
	       vmf->virtual_address,
	       vmf->flags,
	       vmf->pgoff);
	printk(KERN_NOTICE "vma:start=0x%lx size=%x pgoff=%lx\n",
	       vma->vm_start,
	       (int)size,
	       vma->vm_pgoff);
	       
	vma->vm_flags |= VM_MIXEDMAP;
	map_page = data_pages[vmf->pgoff];

	ret = vm_insert_mixed(vma,
			      (unsigned long)vmf->virtual_address,
			      page_to_pfn(map_page));
	printk(KERN_NOTICE "ret=%d pfn=%x vaddr=0x%lx cnt=%d\n",
	       ret,
	       (int)page_to_pfn(map_page),
	       (unsigned long)vmf->virtual_address,
	       page_count(map_page));

	/*
	 * Example of the "Direct I/O"
	 */
	ptr = kmap_atomic(map_page);
	for (i = 0; i < 100; i++) {
		ptr[i] = (unsigned char)vmf->pgoff;
	}
	kunmap_atomic(ptr);
	SetPageDirty(map_page);

	return VM_FAULT_NOPAGE;
}
コード例 #3
0
int MMapPMR(struct file *pFile, struct vm_area_struct *ps_vma)
{
	PVRSRV_ERROR eError;
	IMG_HANDLE hSecurePMRHandle;
	IMG_SIZE_T uiLength;
	IMG_DEVMEM_OFFSET_T uiOffset;
	unsigned long uiPFN;
	IMG_HANDLE hPMRResmanHandle;
	PMR *psPMR;
	PMR_FLAGS_T ulPMRFlags;
	IMG_UINT32 ui32CPUCacheFlags;
	unsigned long ulNewFlags = 0;
	pgprot_t sPageProt;
#if defined(SUPPORT_DRM)
	CONNECTION_DATA *psConnection = LinuxConnectionFromFile(PVR_DRM_FILE_FROM_FILE(pFile));
#else
	CONNECTION_DATA *psConnection = LinuxConnectionFromFile(pFile);
#endif

#if defined(PVR_MMAP_USE_VM_INSERT)
	IMG_BOOL bMixedMap = IMG_FALSE;
#endif
	/*
	 * The pmr lock used here to protect both handle related operations and PMR
	 * operations.
	 * This was introduced to fix lockdep issue.
	 */
	mutex_lock(&g_sMMapMutex);
	PMRLock();

#if defined(SUPPORT_DRM_DC_MODULE)
	psPMR = PVRSRVGEMMMapLookupPMR(pFile, ps_vma);
	if (!psPMR)
#endif
	{
		hSecurePMRHandle = (IMG_HANDLE)((IMG_UINTPTR_T)ps_vma->vm_pgoff);

		eError = PVRSRVLookupHandle(psConnection->psHandleBase,
					    (IMG_HANDLE *) &hPMRResmanHandle,
					    hSecurePMRHandle,
					    PVRSRV_HANDLE_TYPE_PHYSMEM_PMR);
		if (eError != PVRSRV_OK)
		{
			goto e0;
		}

		eError = ResManFindPrivateDataByPtr(hPMRResmanHandle,
						    (void **)&psPMR);
		if (eError != PVRSRV_OK)
		{
			goto e0;
		}
	}

	/*
	 * Take a reference on the PMR, make's sure that it can't be freed
	 * while it's mapped into the user process
	 */
	PMRRefPMR(psPMR);

	PMRUnlock();

	eError = PMRLockSysPhysAddresses(psPMR, PAGE_SHIFT);
	if (eError != PVRSRV_OK)
	{
		goto e1;
	}

	if (((ps_vma->vm_flags & VM_WRITE) != 0) &&
	    ((ps_vma->vm_flags & VM_SHARED) == 0))
	{
		eError = PVRSRV_ERROR_INVALID_PARAMS;
		goto e1;
	}

	/*
	 * We ought to call PMR_Flags() here to check the permissions
	 * against the requested mode, and possibly to set up the cache
	 * control protflags
	 */
	eError = PMR_Flags(psPMR, &ulPMRFlags);
	if (eError != PVRSRV_OK)
	{
		goto e1;
	}

	ulNewFlags = ps_vma->vm_flags;
#if 0
	/* Discard user read/write request, we will pull these flags from the PMR */
	ulNewFlags &= ~(VM_READ | VM_WRITE);

	if (ulPMRFlags & PVRSRV_MEMALLOCFLAG_CPU_READABLE)
	{
		ulNewFlags |= VM_READ;
	}
	if (ulPMRFlags & PVRSRV_MEMALLOCFLAG_CPU_WRITEABLE)
	{
		ulNewFlags |= VM_WRITE;
	}
#endif

	ps_vma->vm_flags = ulNewFlags;

#if defined (CONFIG_ARM64)
	sPageProt = __pgprot_modify(ps_vma->vm_page_prot, 0, vm_get_page_prot(ulNewFlags));
#elif defined(CONFIG_ARM)
	sPageProt = __pgprot_modify(ps_vma->vm_page_prot, L_PTE_MT_MASK, vm_get_page_prot(ulNewFlags));
#elif defined(CONFIG_X86)
	sPageProt = pgprot_modify(ps_vma->vm_page_prot, vm_get_page_prot(ulNewFlags));
#elif defined(CONFIG_METAG) || defined(CONFIG_MIPS)
	sPageProt = vm_get_page_prot(ulNewFlags);
#else
#error Please add pgprot_modify equivalent for your system
#endif
	ui32CPUCacheFlags = DevmemCPUCacheMode(ulPMRFlags);
	switch (ui32CPUCacheFlags)
	{
		case PVRSRV_MEMALLOCFLAG_CPU_UNCACHED:
				sPageProt = pgprot_noncached(sPageProt);
				break;

		case PVRSRV_MEMALLOCFLAG_CPU_WRITE_COMBINE:
				sPageProt = pgprot_writecombine(sPageProt);
				break;

		case PVRSRV_MEMALLOCFLAG_CPU_CACHED:
				break;

		default:
				eError = PVRSRV_ERROR_INVALID_PARAMS;
				goto e1;
	}
	ps_vma->vm_page_prot = sPageProt;

    uiLength = ps_vma->vm_end - ps_vma->vm_start;

    ps_vma->vm_flags |= VM_IO;

/* Don't include the mapping in core dumps */
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
    ps_vma->vm_flags |= VM_DONTDUMP;
#else
    ps_vma->vm_flags |= VM_RESERVED;
#endif

    /*
     * Disable mremap because our nopage handler assumes all
     * page requests have already been validated.
     */
    ps_vma->vm_flags |= VM_DONTEXPAND;
    
    /* Don't allow mapping to be inherited across a process fork */
    ps_vma->vm_flags |= VM_DONTCOPY;

#if defined(PVR_MMAP_USE_VM_INSERT)
	{
		/* Scan the map range for pfns without struct page* handling. If we find
		 * one, this is a mixed map, and we can't use vm_insert_page().
		 */
		for (uiOffset = 0; uiOffset < uiLength; uiOffset += 1ULL<<PAGE_SHIFT)
		{
			IMG_CPU_PHYADDR sCpuPAddr;
			IMG_BOOL bValid;

			eError = PMR_CpuPhysAddr(psPMR, uiOffset, &sCpuPAddr, &bValid);
			PVR_ASSERT(eError == PVRSRV_OK);
			if (eError)
			{
				goto e2;
			}

			if (bValid)
			{
				uiPFN = sCpuPAddr.uiAddr >> PAGE_SHIFT;
				PVR_ASSERT(((IMG_UINT64)uiPFN << PAGE_SHIFT) == sCpuPAddr.uiAddr);

				if (!pfn_valid(uiPFN) || page_count(pfn_to_page(uiPFN)) == 0)
				{
					bMixedMap = IMG_TRUE;
				}
			}
		}

		if (bMixedMap)
		{
		    ps_vma->vm_flags |= VM_MIXEDMAP;
		}
	}
#endif /* defined(PVR_MMAP_USE_VM_INSERT) */

    for (uiOffset = 0; uiOffset < uiLength; uiOffset += 1ULL<<PAGE_SHIFT)
    {
        IMG_SIZE_T uiNumContiguousBytes;
        IMG_INT32 iStatus;
        IMG_CPU_PHYADDR sCpuPAddr;
        IMG_BOOL bValid;

        uiNumContiguousBytes = 1ULL<<PAGE_SHIFT;
        eError = PMR_CpuPhysAddr(psPMR,
                                 uiOffset,
                                 &sCpuPAddr,
                                 &bValid);
        PVR_ASSERT(eError == PVRSRV_OK);
        if (eError)
        {
            goto e2;
        }

		/*
			Only map in pages that are valid, any that aren't will be picked up
			by the nopage handler which will return a zeroed page for us
		*/
		if (bValid)
		{
	        uiPFN = sCpuPAddr.uiAddr >> PAGE_SHIFT;
	        PVR_ASSERT(((IMG_UINT64)uiPFN << PAGE_SHIFT) == sCpuPAddr.uiAddr);

#if defined(PVR_MMAP_USE_VM_INSERT)
			if (bMixedMap)
			{
				/* This path is just for debugging. It should be equivalent
				 * to the remap_pfn_range() path.
				 */
				iStatus = vm_insert_mixed(ps_vma,
										  ps_vma->vm_start + uiOffset,
										  uiPFN);
			}
			else
			{
				iStatus = vm_insert_page(ps_vma,
										 ps_vma->vm_start + uiOffset,
										 pfn_to_page(uiPFN));
			}
#else /* defined(PVR_MMAP_USE_VM_INSERT) */
	        iStatus = remap_pfn_range(ps_vma,
	                                  ps_vma->vm_start + uiOffset,
	                                  uiPFN,
	                                  uiNumContiguousBytes,
	                                  ps_vma->vm_page_prot);
#endif /* defined(PVR_MMAP_USE_VM_INSERT) */

	        PVR_ASSERT(iStatus == 0);
	        if(iStatus)
	        {
	            // N.B. not the right error code, but, it doesn't get propagated anyway... :(
	            eError = PVRSRV_ERROR_OUT_OF_MEMORY;
	
	            goto e2;
	        }

#if defined(PVRSRV_ENABLE_PROCESS_STATS)
    /* USER MAPPING*/
#if !defined(PVRSRV_ENABLE_MEMORY_STATS)
	    PVRSRVStatsIncrMemAllocStat(PVRSRV_MEM_ALLOC_TYPE_MAP_UMA_LMA_PAGES, PAGE_SIZE);
#else
    	PVRSRVStatsAddMemAllocRecord(PVRSRV_MEM_ALLOC_TYPE_MAP_UMA_LMA_PAGES,
    			 	 	 	 	 (IMG_VOID*)(IMG_UINTPTR_T)(ps_vma->vm_start + uiOffset),
								 sCpuPAddr,
								 PAGE_SIZE,
								 IMG_NULL);
#endif
#endif
		}
        (void)pFile;
    }

    /* let us see the PMR so we can unlock it later */
    ps_vma->vm_private_data = psPMR;

    /* Install open and close handlers for ref-counting */
    ps_vma->vm_ops = &gsMMapOps;

	mutex_unlock(&g_sMMapMutex);

    return 0;

    /*
      error exit paths follow
    */
 e2:
    PVR_DPF((PVR_DBG_ERROR, "don't know how to handle this error.  Abort!"));
    PMRUnlockSysPhysAddresses(psPMR);
 e1:
	PMRUnrefPMR(psPMR);
	goto em1;
 e0:
    PVR_DPF((PVR_DBG_ERROR, "Error in MMapPMR critical section"));
	PMRUnlock();
 em1:
    PVR_ASSERT(eError != PVRSRV_OK);
    PVR_DPF((PVR_DBG_ERROR, "unable to translate error %d", eError));
	mutex_unlock(&g_sMMapMutex);

    return -ENOENT; // -EAGAIN // or what?
}
コード例 #4
0
ファイル: dax.c プロジェクト: 020gzh/linux
static int copy_user_bh(struct page *to, struct inode *inode,
		struct buffer_head *bh, unsigned long vaddr)
{
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	struct block_device *bdev = bh->b_bdev;
	void *vto;

	if (dax_map_atomic(bdev, &dax) < 0)
		return PTR_ERR(dax.addr);
	vto = kmap_atomic(to);
	copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
	kunmap_atomic(vto);
	dax_unmap_atomic(bdev, &dax);
	return 0;
}

#define NO_SECTOR -1
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))

static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
		sector_t sector, bool pmd_entry, bool dirty)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	pgoff_t pmd_index = DAX_PMD_INDEX(index);
	int type, error = 0;
	void *entry;

	WARN_ON_ONCE(pmd_entry && !dirty);
	if (dirty)
		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

	spin_lock_irq(&mapping->tree_lock);

	entry = radix_tree_lookup(page_tree, pmd_index);
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
		index = pmd_index;
		goto dirty;
	}

	entry = radix_tree_lookup(page_tree, index);
	if (entry) {
		type = RADIX_DAX_TYPE(entry);
		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
					type != RADIX_DAX_PMD)) {
			error = -EIO;
			goto unlock;
		}

		if (!pmd_entry || type == RADIX_DAX_PMD)
			goto dirty;

		/*
		 * We only insert dirty PMD entries into the radix tree.  This
		 * means we don't need to worry about removing a dirty PTE
		 * entry and inserting a clean PMD entry, thus reducing the
		 * range we would flush with a follow-up fsync/msync call.
		 */
		radix_tree_delete(&mapping->page_tree, index);
		mapping->nrexceptional--;
	}

	if (sector == NO_SECTOR) {
		/*
		 * This can happen during correct operation if our pfn_mkwrite
		 * fault raced against a hole punch operation.  If this
		 * happens the pte that was hole punched will have been
		 * unmapped and the radix tree entry will have been removed by
		 * the time we are called, but the call will still happen.  We
		 * will return all the way up to wp_pfn_shared(), where the
		 * pte_same() check will fail, eventually causing page fault
		 * to be retried by the CPU.
		 */
		goto unlock;
	}

	error = radix_tree_insert(page_tree, index,
			RADIX_DAX_ENTRY(sector, pmd_entry));
	if (error)
		goto unlock;

	mapping->nrexceptional++;
 dirty:
	if (dirty)
		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return error;
}

static int dax_writeback_one(struct block_device *bdev,
		struct address_space *mapping, pgoff_t index, void *entry)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	int type = RADIX_DAX_TYPE(entry);
	struct radix_tree_node *node;
	struct blk_dax_ctl dax;
	void **slot;
	int ret = 0;

	spin_lock_irq(&mapping->tree_lock);
	/*
	 * Regular page slots are stabilized by the page lock even
	 * without the tree itself locked.  These unlocked entries
	 * need verification under the tree lock.
	 */
	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
		goto unlock;
	if (*slot != entry)
		goto unlock;

	/* another fsync thread may have already written back this entry */
	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
		goto unlock;

	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
		ret = -EIO;
		goto unlock;
	}

	dax.sector = RADIX_DAX_SECTOR(entry);
	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
	spin_unlock_irq(&mapping->tree_lock);

	/*
	 * We cannot hold tree_lock while calling dax_map_atomic() because it
	 * eventually calls cond_resched().
	 */
	ret = dax_map_atomic(bdev, &dax);
	if (ret < 0)
		return ret;

	if (WARN_ON_ONCE(ret < dax.size)) {
		ret = -EIO;
		goto unmap;
	}

	wb_cache_pmem(dax.addr, dax.size);

	spin_lock_irq(&mapping->tree_lock);
	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
	spin_unlock_irq(&mapping->tree_lock);
 unmap:
	dax_unmap_atomic(bdev, &dax);
	return ret;

 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return ret;
}

/*
 * Flush the mapping to the persistent domain within the byte range of [start,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
int dax_writeback_mapping_range(struct address_space *mapping,
		struct block_device *bdev, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	pgoff_t start_index, end_index, pmd_index;
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	bool done = false;
	int i, ret = 0;
	void *entry;

	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
		return -EIO;

	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
		return 0;

	start_index = wbc->range_start >> PAGE_SHIFT;
	end_index = wbc->range_end >> PAGE_SHIFT;
	pmd_index = DAX_PMD_INDEX(start_index);

	rcu_read_lock();
	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
	rcu_read_unlock();

	/* see if the start of our range is covered by a PMD entry */
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
		start_index = pmd_index;

	tag_pages_for_writeback(mapping, start_index, end_index);

	pagevec_init(&pvec, 0);
	while (!done) {
		pvec.nr = find_get_entries_tag(mapping, start_index,
				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
				pvec.pages, indices);

		if (pvec.nr == 0)
			break;

		for (i = 0; i < pvec.nr; i++) {
			if (indices[i] > end_index) {
				done = true;
				break;
			}

			ret = dax_writeback_one(bdev, mapping, indices[i],
					pvec.pages[i]);
			if (ret < 0)
				return ret;
		}
	}
	wmb_pmem();
	return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
			struct vm_area_struct *vma, struct vm_fault *vmf)
{
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	struct address_space *mapping = inode->i_mapping;
	struct block_device *bdev = bh->b_bdev;
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	pgoff_t size;
	int error;

	i_mmap_lock_read(mapping);

	/*
	 * Check truncate didn't happen while we were allocating a block.
	 * If it did, this block may or may not be still allocated to the
	 * file.  We can't tell the filesystem to free it because we can't
	 * take i_mutex here.  In the worst case, the file still has blocks
	 * allocated past the end of the file.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (unlikely(vmf->pgoff >= size)) {
		error = -EIO;
		goto out;
	}

	if (dax_map_atomic(bdev, &dax) < 0) {
		error = PTR_ERR(dax.addr);
		goto out;
	}

	if (buffer_unwritten(bh) || buffer_new(bh)) {
		clear_pmem(dax.addr, PAGE_SIZE);
		wmb_pmem();
	}
	dax_unmap_atomic(bdev, &dax);

	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
			vmf->flags & FAULT_FLAG_WRITE);
	if (error)
		goto out;

	error = vm_insert_mixed(vma, vaddr, dax.pfn);

 out:
	i_mmap_unlock_read(mapping);

	return error;
}

/**
 * __dax_fault - handle a page fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
 * the necessary locking for the page fault to proceed successfully.
 */
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	struct buffer_head bh;
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	unsigned blkbits = inode->i_blkbits;
	sector_t block;
	pgoff_t size;
	int error;
	int major = 0;

	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		return VM_FAULT_SIGBUS;

	memset(&bh, 0, sizeof(bh));
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
	bh.b_bdev = inode->i_sb->s_bdev;
	bh.b_size = PAGE_SIZE;

 repeat:
	page = find_get_page(mapping, vmf->pgoff);
	if (page) {
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
			put_page(page);
			return VM_FAULT_RETRY;
		}
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			put_page(page);
			goto repeat;
		}
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
		if (unlikely(vmf->pgoff >= size)) {
			/*
			 * We have a struct page covering a hole in the file
			 * from a read fault and we've raced with a truncate
			 */
			error = -EIO;
			goto unlock_page;
		}
	}

	error = get_block(inode, block, &bh, 0);
	if (!error && (bh.b_size < PAGE_SIZE))
		error = -EIO;		/* fs corruption? */
	if (error)
		goto unlock_page;

	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
		if (vmf->flags & FAULT_FLAG_WRITE) {
			error = get_block(inode, block, &bh, 1);
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			major = VM_FAULT_MAJOR;
			if (!error && (bh.b_size < PAGE_SIZE))
				error = -EIO;
			if (error)
				goto unlock_page;
		} else {
			return dax_load_hole(mapping, page, vmf);
		}
	}

	if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
			error = copy_user_bh(new_page, inode, &bh, vaddr);
		else
			clear_user_highpage(new_page, vaddr);
		if (error)
			goto unlock_page;
		vmf->page = page;
		if (!page) {
			i_mmap_lock_read(mapping);
			/* Check we didn't race with truncate */
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
								PAGE_SHIFT;
			if (vmf->pgoff >= size) {
				i_mmap_unlock_read(mapping);
				error = -EIO;
				goto out;
			}
		}
		return VM_FAULT_LOCKED;
	}

	/* Check we didn't race with a read fault installing a new page */
	if (!page && major)
		page = find_lock_page(mapping, vmf->pgoff);

	if (page) {
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
							PAGE_SIZE, 0);
		delete_from_page_cache(page);
		unlock_page(page);
		put_page(page);
		page = NULL;
	}

	/*
	 * If we successfully insert the new mapping over an unwritten extent,
	 * we need to ensure we convert the unwritten extent. If there is an
	 * error inserting the mapping, the filesystem needs to leave it as
	 * unwritten to prevent exposure of the stale underlying data to
	 * userspace, but we still need to call the completion function so
	 * the private resources on the mapping buffer can be released. We
	 * indicate what the callback should do via the uptodate variable, same
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
		return VM_FAULT_OOM | major;
	/* -EBUSY is fine, somebody else faulted on the same PTE */
	if ((error < 0) && (error != -EBUSY))
		return VM_FAULT_SIGBUS | major;
	return VM_FAULT_NOPAGE | major;

 unlock_page:
	if (page) {
		unlock_page(page);
		put_page(page);
	}
	goto out;
}