Ejemplo n.º 1
0
/*
 * mmap_sem must be held on entry.  If @nonblocking != NULL and
 * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
 * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
 */
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
		unsigned long address, unsigned int *flags, int *nonblocking)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned int fault_flags = 0;
	int ret;

	/* For mlock, just skip the stack guard page. */
	if ((*flags & FOLL_MLOCK) &&
			(stack_guard_page_start(vma, address) ||
			 stack_guard_page_end(vma, address + PAGE_SIZE)))
		return -ENOENT;
	if (*flags & FOLL_WRITE)
		fault_flags |= FAULT_FLAG_WRITE;
	if (nonblocking)
		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
	if (*flags & FOLL_NOWAIT)
		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
	if (*flags & FOLL_TRIED) {
		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
		fault_flags |= FAULT_FLAG_TRIED;
	}

	ret = handle_mm_fault(mm, vma, address, fault_flags);
	if (ret & VM_FAULT_ERROR) {
		if (ret & VM_FAULT_OOM)
			return -ENOMEM;
		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
			return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
		if (ret & VM_FAULT_SIGBUS)
			return -EFAULT;
		BUG();
	}

	if (tsk) {
		if (ret & VM_FAULT_MAJOR)
			tsk->maj_flt++;
		else
			tsk->min_flt++;
	}

	if (ret & VM_FAULT_RETRY) {
		if (nonblocking)
			*nonblocking = 0;
		return -EBUSY;
	}

	/*
	 * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
	 * necessary, even if maybe_mkwrite decided not to set pte_write. We
	 * can thus safely do subsequent page lookups as if they were reads.
	 * But only do so when looping for pte_write is futile: in some cases
	 * userspace may also be wanting to write to the gotten user page,
	 * which a read fault here might prevent (a readonly page might get
	 * reCOWed by userspace write).
	 */
	if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
		*flags &= ~FOLL_WRITE;
	return 0;
}
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
{
	struct mm_struct *mm = vma->vm_mm;
	struct file *file = vma->vm_file;
	struct proc_maps_private *priv = m->private;
	vm_flags_t flags = vma->vm_flags;
	unsigned long ino = 0;
	unsigned long long pgoff = 0;
	unsigned long start, end;
	dev_t dev = 0;
	const char *name = NULL;

	if (file) {
		struct inode *inode = file_inode(vma->vm_file);
		dev = inode->i_sb->s_dev;
		ino = inode->i_ino;
		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
	}

	/* We don't show the stack guard page in /proc/maps */
	start = vma->vm_start;
	if (stack_guard_page_start(vma, start))
		start += PAGE_SIZE;
	end = vma->vm_end;
	if (stack_guard_page_end(vma, end))
		end -= PAGE_SIZE;

	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
			start,
			end,
			flags & VM_READ ? 'r' : '-',
			flags & VM_WRITE ? 'w' : '-',
			flags & VM_EXEC ? 'x' : '-',
			flags & VM_MAYSHARE ? 's' : 'p',
			pgoff,
			MAJOR(dev), MINOR(dev), ino);

	/*
	 * Print the dentry name for named mappings, and a
	 * special [heap] marker for the heap:
	 */
	if (file) {
		seq_pad(m, ' ');
		seq_path(m, &file->f_path, "\n");
		goto done;
	}

	if (vma->vm_ops && vma->vm_ops->name) {
		name = vma->vm_ops->name(vma);
		if (name)
			goto done;
	}

	name = arch_vma_name(vma);
	if (!name) {
		pid_t tid;

		if (!mm) {
			name = "[vdso]";
			goto done;
		}

		if (vma->vm_start <= mm->brk &&
		    vma->vm_end >= mm->start_brk) {
			name = "[heap]";
			goto done;
		}

		tid = pid_of_stack(priv, vma, is_pid);
		if (tid != 0) {
			/*
			 * Thread stack in /proc/PID/task/TID/maps or
			 * the main process stack.
			 */
			if (!is_pid || (vma->vm_start <= mm->start_stack &&
			    vma->vm_end >= mm->start_stack)) {
				name = "[stack]";
			} else {
				/* Thread stack in /proc/PID/maps */
				seq_pad(m, ' ');
				seq_printf(m, "[stack:%d]", tid);
			}
			goto done;
		}

		if (vma_get_anon_name(vma)) {
			seq_pad(m, ' ');
			seq_print_vma_name(m, vma);
		}
	}

done:
	if (name) {
		seq_pad(m, ' ');
		seq_puts(m, name);
	}
	seq_putc(m, '\n');
}
Ejemplo n.º 3
0
/*
 * Get user stack entries up to the pcstack_limit; return the number of entries
 * acquired.  If pcstack is NULL, return the number of entries potentially
 * acquirable.
 */
unsigned long dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack,
				 int pcstack_limit)
{
	struct task_struct	*p = current;
	struct mm_struct	*mm = p->mm;
	unsigned long		tos, bos, fpc;
	unsigned long		*sp;
	unsigned long		depth = 0;
	struct vm_area_struct	*stack_vma;
	struct page		*stack_page = NULL;
	struct pt_regs		*regs = current_pt_regs();

	if (pcstack) {
		if (unlikely(pcstack_limit < 2)) {
			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
			return 0;
		}
		*pcstack++ = (uint64_t)p->pid;
		*pcstack++ = (uint64_t)p->tgid;
		pcstack_limit -= 2;
	}

	if (!user_mode(regs))
		goto out;

	/*
	 * There is always at least one address to report: the instruction
	 * pointer itself (frame 0).
	 */
	depth++;

	fpc = instruction_pointer(regs);
	if (pcstack) {
		*pcstack++ = (uint64_t)fpc;
		pcstack_limit--;
	}

	/*
	 * We cannot ustack() if this task has no mm, if this task is a kernel
	 * thread, or when someone else has the mmap_sem or the page_table_lock
	 * (because find_user_vma() ultimately does a __get_user_pages() and
	 * thence a follow_page(), which can take that lock).
	 */
	if (mm == NULL || (p->flags & PF_KTHREAD) ||
	    spin_is_locked(&mm->page_table_lock))
		goto out;

	if (!down_read_trylock(&mm->mmap_sem))
		goto out;
	atomic_inc(&mm->mm_users);

	/*
	 * The following construct can be replaced with:
	 * 	tos = current_user_stack_pointer();
	 * once support for 4.0 is no longer necessary.
	 */
#ifdef CONFIG_X86_64
	tos = current_pt_regs()->sp;
#else
	tos = user_stack_pointer(current_pt_regs());
#endif
	stack_vma = find_user_vma(p, mm, NULL, (unsigned long) tos, 0);
	if (!stack_vma ||
	    stack_vma->vm_start > (unsigned long) tos)
		goto unlock_out;

#ifdef CONFIG_STACK_GROWSUP
#error This code does not yet work on STACK_GROWSUP platforms.
#endif
	bos = stack_vma->vm_end;
	if (stack_guard_page_end(stack_vma, bos))
                bos -= PAGE_SIZE;

	/*
	 * If we have a pcstack, loop as long as we are within the stack limit.
	 * Otherwise, loop until we run out of stack.
	 */
	for (sp = (unsigned long *)tos;
	     sp <= (unsigned long *)bos &&
		     ((pcstack && pcstack_limit > 0) ||
		      !pcstack);
	     sp++) {
		struct vm_area_struct	*code_vma;
		unsigned long		addr;

		/*
		 * Recheck for faultedness and pin at page boundaries.
		 */
		if (!stack_page || (((unsigned long)sp & PAGE_MASK) == 0)) {
			if (stack_page) {
				put_page(stack_page);
				stack_page = NULL;
			}

			if (!find_user_vma(p, mm, &stack_page,
					   (unsigned long) sp, 1))
				break;
		}

		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
		get_user(addr, sp);
		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);

		if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) {
			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_BADADDR);
			break;
		}

		if (addr == fpc)
			continue;

		code_vma = find_user_vma(p, mm, NULL, addr, 0);

		if (!code_vma || code_vma->vm_start > addr)
			continue;

		if ((addr >= tos && addr <= bos) ||
		    (code_vma->vm_flags & VM_GROWSDOWN)) {
			/* stack address - may need it for the fpstack. */
		} else if (code_vma->vm_flags & VM_EXEC) {
			if (pcstack) {
				*pcstack++ = addr;
				pcstack_limit--;
			}
			depth++;
		}
	}
	if (stack_page != NULL)
		put_page(stack_page);

unlock_out:
	atomic_dec(&mm->mm_users);
	up_read(&mm->mmap_sem);

out:
	if (pcstack)
		while (pcstack_limit--)
			*pcstack++ = 0;

	return depth;
}