示例#1
0
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
		     pmd_t *pmdp)
{
	pmd_t entry = *pmdp;
	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
pmd_t pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
			   pmd_t *pmdp)
{
	pmd_t pmd = pmd_mksplitting(*pmdp);
	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
	
	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
示例#3
0
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
			  pmd_t *pmdp)
{
	pmd_t pmd = pmd_mksplitting(*pmdp);
	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
	/* tlb flush only to serialize against gup-fast */
	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
示例#4
0
void pmdp_splitting_flush(struct vm_area_struct *vma,
			 unsigned long address,
			 pmd_t *pmdp)
{
	if (!pmd_trans_splitting(*pmdp)) {
		pmd_t pmd = pmd_mksplitting(*pmdp);
		set_pmd_at(vma->vm_mm, address, pmdp, pmd);
	}
}
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
{
    pmd_t entry = *pmdp;

    pmd_val(entry) &= ~_PAGE_VALID;

    set_pmd_at(vma->vm_mm, address, pmdp, entry);
    flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
示例#6
0
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
			  pmd_t *pmdp)
{
	pmd_t pmd = pmd_mksplitting(*pmdp);

	VM_BUG_ON(address & ~PMD_MASK);
	set_pmd_at(vma->vm_mm, address, pmdp, pmd);

	/* dummy IPI to serialise against fast_gup */
	kick_all_cpus_sync();
}
示例#7
0
int pmdp_set_access_flags(struct vm_area_struct *vma,
			  unsigned long address, pmd_t *pmdp,
			  pmd_t entry, int dirty)
{
	int changed = !pmd_same(*pmdp, entry);
	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	if (changed) {
		set_pmd_at(vma->vm_mm, address, pmdp, entry);
		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
	}
	return changed;
}
示例#8
0
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
		unsigned long addr, pmd_t *pmdp)
{
	pmd_t pmd = *pmdp;

	pmd = pmd_wrprotect(pmd);
	pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);

	if (vma->vm_flags & VM_SOFTDIRTY)
		vma->vm_flags &= ~VM_SOFTDIRTY;

	set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
示例#9
0
int pmdp_set_access_flags(struct vm_area_struct *vma,
			  unsigned long address, pmd_t *pmdp,
			  pmd_t entry, int dirty)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	int changed = !pmd_same(*pmdp, entry);
	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	if (changed) {
		set_pmd_at(vma->vm_mm, address, pmdp, entry);
		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
	}
	return changed;
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
	BUG();
	return 0;
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
}
/*
 * This routine is only called when splitting a THP
 */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
		     pmd_t *pmdp)
{
	pmd_t entry = *pmdp;

	pmd_val(entry) &= ~_PAGE_VALID;

	set_pmd_at(vma->vm_mm, address, pmdp, entry);
	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);

	/*
	 * set_pmd_at() will not be called in a way to decrement
	 * thp_pte_count when splitting a THP, so do it now.
	 * Sanity check pmd before doing the actual decrement.
	 */
	if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
	    !is_huge_zero_page(pmd_page(entry)))
		(vma->vm_mm)->context.thp_pte_count--;
}
示例#11
0
文件: dax.c 项目: 020gzh/linux
int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
		pmd_t *pmd, unsigned int flags, get_block_t get_block,
		dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct buffer_head bh;
	unsigned blkbits = inode->i_blkbits;
	unsigned long pmd_addr = address & PMD_MASK;
	bool write = flags & FAULT_FLAG_WRITE;
	struct block_device *bdev;
	pgoff_t size, pgoff;
	sector_t block;
	int error, result = 0;
	bool alloc = false;

	/* dax pmd mappings require pfn_t_devmap() */
	if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
		return VM_FAULT_FALLBACK;

	/* Fall back to PTEs if we're going to COW */
	if (write && !(vma->vm_flags & VM_SHARED)) {
		split_huge_pmd(vma, pmd, address);
		dax_pmd_dbg(NULL, address, "cow write");
		return VM_FAULT_FALLBACK;
	}
	/* If the PMD would extend outside the VMA */
	if (pmd_addr < vma->vm_start) {
		dax_pmd_dbg(NULL, address, "vma start unaligned");
		return VM_FAULT_FALLBACK;
	}
	if ((pmd_addr + PMD_SIZE) > vma->vm_end) {
		dax_pmd_dbg(NULL, address, "vma end unaligned");
		return VM_FAULT_FALLBACK;
	}

	pgoff = linear_page_index(vma, pmd_addr);
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (pgoff >= size)
		return VM_FAULT_SIGBUS;
	/* If the PMD would cover blocks out of the file */
	if ((pgoff | PG_PMD_COLOUR) >= size) {
		dax_pmd_dbg(NULL, address,
				"offset + huge page size > file size");
		return VM_FAULT_FALLBACK;
	}

	memset(&bh, 0, sizeof(bh));
	bh.b_bdev = inode->i_sb->s_bdev;
	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);

	bh.b_size = PMD_SIZE;

	if (get_block(inode, block, &bh, 0) != 0)
		return VM_FAULT_SIGBUS;

	if (!buffer_mapped(&bh) && write) {
		if (get_block(inode, block, &bh, 1) != 0)
			return VM_FAULT_SIGBUS;
		alloc = true;
	}

	bdev = bh.b_bdev;

	/*
	 * If the filesystem isn't willing to tell us the length of a hole,
	 * just fall back to PTEs.  Calling get_block 512 times in a loop
	 * would be silly.
	 */
	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
		dax_pmd_dbg(&bh, address, "allocated block too small");
		return VM_FAULT_FALLBACK;
	}

	/*
	 * If we allocated new storage, make sure no process has any
	 * zero pages covering this hole
	 */
	if (alloc) {
		loff_t lstart = pgoff << PAGE_SHIFT;
		loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */

		truncate_pagecache_range(inode, lstart, lend);
	}

	i_mmap_lock_read(mapping);

	/*
	 * If a truncate happened while we were allocating blocks, we may
	 * leave blocks allocated to the file that are beyond EOF.  We can't
	 * take i_mutex here, so just leave them hanging; they'll be freed
	 * when the file is deleted.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (pgoff >= size) {
		result = VM_FAULT_SIGBUS;
		goto out;
	}
	if ((pgoff | PG_PMD_COLOUR) >= size) {
		dax_pmd_dbg(&bh, address,
				"offset + huge page size > file size");
		goto fallback;
	}

	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
		spinlock_t *ptl;
		pmd_t entry;
		struct page *zero_page = get_huge_zero_page();

		if (unlikely(!zero_page)) {
			dax_pmd_dbg(&bh, address, "no zero page");
			goto fallback;
		}

		ptl = pmd_lock(vma->vm_mm, pmd);
		if (!pmd_none(*pmd)) {
			spin_unlock(ptl);
			dax_pmd_dbg(&bh, address, "pmd already present");
			goto fallback;
		}

		dev_dbg(part_to_dev(bdev->bd_part),
				"%s: %s addr: %lx pfn: <zero> sect: %llx\n",
				__func__, current->comm, address,
				(unsigned long long) to_sector(&bh, inode));

		entry = mk_pmd(zero_page, vma->vm_page_prot);
		entry = pmd_mkhuge(entry);
		set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
		result = VM_FAULT_NOPAGE;
		spin_unlock(ptl);
	} else {
		struct blk_dax_ctl dax = {
			.sector = to_sector(&bh, inode),
			.size = PMD_SIZE,
		};
		long length = dax_map_atomic(bdev, &dax);

		if (length < 0) {
			result = VM_FAULT_SIGBUS;
			goto out;
		}
		if (length < PMD_SIZE) {
			dax_pmd_dbg(&bh, address, "dax-length too small");
			dax_unmap_atomic(bdev, &dax);
			goto fallback;
		}
		if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) {
			dax_pmd_dbg(&bh, address, "pfn unaligned");
			dax_unmap_atomic(bdev, &dax);
			goto fallback;
		}

		if (!pfn_t_devmap(dax.pfn)) {
			dax_unmap_atomic(bdev, &dax);
			dax_pmd_dbg(&bh, address, "pfn not in memmap");
			goto fallback;
		}

		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
			clear_pmem(dax.addr, PMD_SIZE);
			wmb_pmem();
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			result |= VM_FAULT_MAJOR;
		}
		dax_unmap_atomic(bdev, &dax);

		/*
		 * For PTE faults we insert a radix tree entry for reads, and
		 * leave it clean.  Then on the first write we dirty the radix
		 * tree entry via the dax_pfn_mkwrite() path.  This sequence
		 * allows the dax_pfn_mkwrite() call to be simpler and avoid a
		 * call into get_block() to translate the pgoff to a sector in
		 * order to be able to create a new radix tree entry.
		 *
		 * The PMD path doesn't have an equivalent to
		 * dax_pfn_mkwrite(), though, so for a read followed by a
		 * write we traverse all the way through __dax_pmd_fault()
		 * twice.  This means we can just skip inserting a radix tree
		 * entry completely on the initial read and just wait until
		 * the write to insert a dirty entry.
		 */
		if (write) {
			error = dax_radix_entry(mapping, pgoff, dax.sector,
					true, true);
			if (error) {
				dax_pmd_dbg(&bh, address,
						"PMD radix insertion failed");
				goto fallback;
			}
		}

		dev_dbg(part_to_dev(bdev->bd_part),
				"%s: %s addr: %lx pfn: %lx sect: %llx\n",
				__func__, current->comm, address,
				pfn_t_to_pfn(dax.pfn),
				(unsigned long long) dax.sector);
		result |= vmf_insert_pfn_pmd(vma, address, pmd,
				dax.pfn, write);
	}

 out:
	i_mmap_unlock_read(mapping);

	if (buffer_unwritten(&bh))
		complete_unwritten(&bh, !(result & VM_FAULT_ERROR));

	return result;

 fallback:
	count_vm_event(THP_FAULT_FALLBACK);
	result = VM_FAULT_FALLBACK;
	goto out;
}
EXPORT_SYMBOL_GPL(__dax_pmd_fault);

/**
 * dax_pmd_fault - handle a PMD fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 *
 * When a page fault occurs, filesystems may call this helper in their
 * pmd_fault handler for DAX files.
 */
int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
			pmd_t *pmd, unsigned int flags, get_block_t get_block,
			dax_iodone_t complete_unwritten)
{
	int result;
	struct super_block *sb = file_inode(vma->vm_file)->i_sb;

	if (flags & FAULT_FLAG_WRITE) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
	}
	result = __dax_pmd_fault(vma, address, pmd, flags, get_block,
				complete_unwritten);
	if (flags & FAULT_FLAG_WRITE)
		sb_end_pagefault(sb);

	return result;
}