/* * mmap_sem must be held on entry. If @nonblocking != NULL and * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. * If it is, *@nonblocking will be set to 0 and -EBUSY returned. */ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *nonblocking) { struct mm_struct *mm = vma->vm_mm; unsigned int fault_flags = 0; int ret; /* For mlock, just skip the stack guard page. */ if ((*flags & FOLL_MLOCK) && (stack_guard_page_start(vma, address) || stack_guard_page_end(vma, address + PAGE_SIZE))) return -ENOENT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) fault_flags |= FAULT_FLAG_ALLOW_RETRY; if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; if (*flags & FOLL_TRIED) { VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY); fault_flags |= FAULT_FLAG_TRIED; } ret = handle_mm_fault(mm, vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) return -ENOMEM; if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; if (ret & VM_FAULT_SIGBUS) return -EFAULT; BUG(); } if (tsk) { if (ret & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; } if (ret & VM_FAULT_RETRY) { if (nonblocking) *nonblocking = 0; return -EBUSY; } /* * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when * necessary, even if maybe_mkwrite decided not to set pte_write. We * can thus safely do subsequent page lookups as if they were reads. * But only do so when looping for pte_write is futile: in some cases * userspace may also be wanting to write to the gotten user page, * which a read fault here might prevent (a readonly page might get * reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) *flags &= ~FOLL_WRITE; return 0; }
static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; struct proc_maps_private *priv = m->private; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; const char *name = NULL; if (file) { struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; } /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (stack_guard_page_start(vma, start)) start += PAGE_SIZE; end = vma->vm_end; if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", start, end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, MAJOR(dev), MINOR(dev), ino); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { seq_pad(m, ' '); seq_path(m, &file->f_path, "\n"); goto done; } if (vma->vm_ops && vma->vm_ops->name) { name = vma->vm_ops->name(vma); if (name) goto done; } name = arch_vma_name(vma); if (!name) { pid_t tid; if (!mm) { name = "[vdso]"; goto done; } if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { name = "[heap]"; goto done; } tid = pid_of_stack(priv, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. */ if (!is_pid || (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack)) { name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ seq_pad(m, ' '); seq_printf(m, "[stack:%d]", tid); } goto done; } if (vma_get_anon_name(vma)) { seq_pad(m, ' '); seq_print_vma_name(m, vma); } } done: if (name) { seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); }
/* * Get user stack entries up to the pcstack_limit; return the number of entries * acquired. If pcstack is NULL, return the number of entries potentially * acquirable. */ unsigned long dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) { struct task_struct *p = current; struct mm_struct *mm = p->mm; unsigned long tos, bos, fpc; unsigned long *sp; unsigned long depth = 0; struct vm_area_struct *stack_vma; struct page *stack_page = NULL; struct pt_regs *regs = current_pt_regs(); if (pcstack) { if (unlikely(pcstack_limit < 2)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return 0; } *pcstack++ = (uint64_t)p->pid; *pcstack++ = (uint64_t)p->tgid; pcstack_limit -= 2; } if (!user_mode(regs)) goto out; /* * There is always at least one address to report: the instruction * pointer itself (frame 0). */ depth++; fpc = instruction_pointer(regs); if (pcstack) { *pcstack++ = (uint64_t)fpc; pcstack_limit--; } /* * We cannot ustack() if this task has no mm, if this task is a kernel * thread, or when someone else has the mmap_sem or the page_table_lock * (because find_user_vma() ultimately does a __get_user_pages() and * thence a follow_page(), which can take that lock). */ if (mm == NULL || (p->flags & PF_KTHREAD) || spin_is_locked(&mm->page_table_lock)) goto out; if (!down_read_trylock(&mm->mmap_sem)) goto out; atomic_inc(&mm->mm_users); /* * The following construct can be replaced with: * tos = current_user_stack_pointer(); * once support for 4.0 is no longer necessary. */ #ifdef CONFIG_X86_64 tos = current_pt_regs()->sp; #else tos = user_stack_pointer(current_pt_regs()); #endif stack_vma = find_user_vma(p, mm, NULL, (unsigned long) tos, 0); if (!stack_vma || stack_vma->vm_start > (unsigned long) tos) goto unlock_out; #ifdef CONFIG_STACK_GROWSUP #error This code does not yet work on STACK_GROWSUP platforms. #endif bos = stack_vma->vm_end; if (stack_guard_page_end(stack_vma, bos)) bos -= PAGE_SIZE; /* * If we have a pcstack, loop as long as we are within the stack limit. * Otherwise, loop until we run out of stack. */ for (sp = (unsigned long *)tos; sp <= (unsigned long *)bos && ((pcstack && pcstack_limit > 0) || !pcstack); sp++) { struct vm_area_struct *code_vma; unsigned long addr; /* * Recheck for faultedness and pin at page boundaries. */ if (!stack_page || (((unsigned long)sp & PAGE_MASK) == 0)) { if (stack_page) { put_page(stack_page); stack_page = NULL; } if (!find_user_vma(p, mm, &stack_page, (unsigned long) sp, 1)) break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); get_user(addr, sp); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) { DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_BADADDR); break; } if (addr == fpc) continue; code_vma = find_user_vma(p, mm, NULL, addr, 0); if (!code_vma || code_vma->vm_start > addr) continue; if ((addr >= tos && addr <= bos) || (code_vma->vm_flags & VM_GROWSDOWN)) { /* stack address - may need it for the fpstack. */ } else if (code_vma->vm_flags & VM_EXEC) { if (pcstack) { *pcstack++ = addr; pcstack_limit--; } depth++; } } if (stack_page != NULL) put_page(stack_page); unlock_out: atomic_dec(&mm->mm_users); up_read(&mm->mmap_sem); out: if (pcstack) while (pcstack_limit--) *pcstack++ = 0; return depth; }