/** * bpf_load: checks and returns a pointer to the requested offset * @off: offset into struct seccomp_data to load from * * Returns the requested 32-bits of data. * seccomp_chk_filter() should assure that @off is 32-bit aligned * and not out of bounds. Failure to do so is a BUG. */ u32 seccomp_bpf_load(int off) { struct pt_regs *regs = task_pt_regs(current); if (off == BPF_DATA(nr)) return syscall_get_nr(current, regs); if (off == BPF_DATA(arch)) return syscall_get_arch(current, regs); if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { unsigned long value; int arg = (off - BPF_DATA(args[0])) / sizeof(u64); int index = !!(off % sizeof(u64)); syscall_get_arguments(current, regs, arg, 1, &value); return get_u32(value, index); } if (off == BPF_DATA(instruction_pointer)) return get_u32(KSTK_EIP(current), 0); if (off == BPF_DATA(instruction_pointer) + sizeof(u32)) return get_u32(KSTK_EIP(current), 1); /* seccomp_chk_filter should make this impossible. */ BUG(); }
/** * seccomp_send_sigsys - signals the task to allow in-process syscall emulation * @syscall: syscall number to send to userland * @reason: filter-supplied reason code to send to userland (via si_errno) * * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. */ static void seccomp_send_sigsys(int syscall, int reason) { struct siginfo info; memset(&info, 0, sizeof(info)); info.si_signo = SIGSYS; info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; info.si_arch = syscall_get_arch(current, task_pt_regs(current)); info.si_syscall = syscall; force_sig_info(SIGSYS, &info, current); }
notrace void probe_memory_handle_fault_entry(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) { struct marker *marker; struct serialize_long_long_int data; data.f1 = address; data.f2 = KSTK_EIP(current); data.f3 = write_access; marker = &GET_MARKER(mm_handle_fault_entry); ltt_specialized_trace(marker->single.probe_private, &data, serialize_sizeof(data), sizeof(long)); }
/* * Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); syscall_get_arguments(task, regs, 0, 6, args); sd->args[0] = args[0]; sd->args[1] = args[1]; sd->args[2] = args[2]; sd->args[3] = args[3]; sd->args[4] = args[4]; sd->args[5] = args[5]; sd->instruction_pointer = KSTK_EIP(task); }
/* * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) { user_exit(); current_thread_info()->syscall = syscall; if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (tracehook_report_syscall_entry(regs)) return -1; syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP if (unlikely(test_thread_flag(TIF_SECCOMP))) { int ret, i; struct seccomp_data sd; unsigned long args[6]; sd.nr = syscall; sd.arch = syscall_get_arch(); syscall_get_arguments(current, regs, 0, 6, args); for (i = 0; i < 6; i++) sd.args[i] = args[i]; sd.instruction_pointer = KSTK_EIP(current); ret = __secure_computing(&sd); if (ret == -1) return ret; syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ if (syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); return syscall; }
/* * Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(task, regs); /* Unroll syscall_get_args to help gcc on arm. */ syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); sd->instruction_pointer = KSTK_EIP(task); }
void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { uint64_t *pcstack_end = pcstack + pcstack_limit; volatile uint8_t *flags = (volatile uint8_t *)&cpu_core[cpu_get_id()].cpuc_dtrace_flags; unsigned long *sp; unsigned long *bos; if (*flags & CPU_DTRACE_FAULT) return; if (pcstack_limit <= 0) return; *pcstack++ = (uint64_t)current->pid; if (pcstack >= pcstack_end) return; /***********************************************/ /* Linux provides a built in function which */ /* is good because stack walking is arch */ /* dependent. (save_stack_trace) */ /* */ /* Unfortunately this is options dependent */ /* (CONFIG_STACKTRACE) so we cannot use it. */ /* And its GPL anyhow, so we cannot copy */ /* it. */ /* */ /* Whats worse is that we might be compiled */ /* with a frame pointer (only on x86-32) so */ /* we have three scenarios to handle. */ /***********************************************/ /***********************************************/ /* Ye gods! The world is an awful place to */ /* live. The target process, may or may not */ /* have frame pointers. In fact, some */ /* frames may have it and some may not (eg */ /* different libraries may be compiled */ /* differently). */ /* */ /* Looks like distro owners dont care about */ /* debuggabiity, and give us no frame */ /* pointers. */ /* */ /* This function is really important and */ /* useful. On modern Linux systems, gdb */ /* (and pstack) contain all the smarts. In */ /* fact, pstack is often a wrapper around */ /* gdb - i.e. its so complex we cannot do */ /* this. */ /***********************************************/ /***********************************************/ /* Bear in mind that user stacks can be */ /* megabytes in size, vs kernel stacks */ /* which are limited to a few K (4 or 8K */ /* typically). */ /***********************************************/ // sp = current->thread.rsp; # if defined(__i386) bos = sp = KSTK_ESP(current); # define ALIGN_MASK 3 # else /***********************************************/ /* KSTK_ESP() doesnt exist for x86_64 (its */ /* set to -1). */ /***********************************************/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) # if defined(KSTK_EIP) /***********************************************/ /* Handle ARM and more kernel independent, */ /* but might not exist. */ /***********************************************/ bos = sp = (unsigned long *) KSTK_EIP(current); # else bos = sp = (unsigned long *) task_pt_regs(current)->sp; # endif #else bos = sp = task_pt_regs(current)->rsp; #endif # define ALIGN_MASK 7 #endif /***********************************************/ /* Walk the stack. We cannot rely on a */ /* frame pointer at each level, and we */ /* really want to avoid probing every word */ /* in the stack - a large stack will eat */ /* cpu looking at thousands of entries. So */ /* try and heuristically see if we have a */ /* likely frame pointer to jump over the */ /* frame, but, if not, just go one word at */ /* a time. */ /* */ /* Try and be careful we dont walk outside */ /* the stack or walk backwards in the */ /* stack, too. */ /***********************************************/ { uintptr_t *spend = sp + 1024; struct vm_area_struct *vma = find_vma(current->mm, (unsigned long) sp); if (vma) spend = (uintptr_t *) (vma->vm_end - sizeof(int *)); /*printk("xbos=%p %p\n", bos, spend);*/ /***********************************************/ /* Have you ever looked at the output from */ /* GCC in amd64 mode? Things like: */ /* */ /* push %r12 */ /* push %rbp */ /* */ /* will make you come out in a cold sweat - */ /* no way to find the frame pointer, */ /* without doing what GDB does (ie read the */ /* DWARF stack unwind info). So, for now, */ /* you get some false positives in the */ /* output - but we try to be conservative. */ /***********************************************/ while (sp >= bos && sp < spend && validate_ptr(sp)) { /*printk(" %p %d: %p %d\n", sp, validate_ptr(sp), sp[0], validate_ptr(sp[0]));*/ if (validate_ptr((void *) sp[0])) { uintptr_t p = sp[-1]; /***********************************************/ /* Try and avoid false positives in stack */ /* entries - we want this to be an */ /* executable instruction. */ /***********************************************/ if (((unsigned long *) sp[0] < bos || (unsigned long *) sp[0] > spend) && (vma = find_vma(current->mm, sp[0])) != NULL && vma->vm_flags & VM_EXEC) { *pcstack++ = sp[0]; if (pcstack >= pcstack_end) break; } if (((int) p & ALIGN_MASK) == 0 && p > (uintptr_t) sp && p < (uintptr_t) spend) sp = (unsigned long *) p; } sp++; } } /***********************************************/ /* Erase anything else in the buffer to */ /* avoid confusion. */ /***********************************************/ while (pcstack < pcstack_end) *pcstack++ = (pc_t) NULL; }
static int get_stat(int pid, char * buffer) { struct task_struct ** p = get_task(pid); unsigned long sigignore=0, sigcatch=0, bit=1, wchan; int i,tty_pgrp; char state; if (!p || !*p) return 0; if ((*p)->state < 0 || (*p)->state > 5) state = '.'; else state = "RSDZTD"[(*p)->state]; wchan = get_wchan(*p); for(i=0; i<32; ++i) { switch((int) (*p)->sigaction[i].sa_handler) { case 1: sigignore |= bit; break; case 0: break; default: sigcatch |= bit; } bit <<= 1; } tty_pgrp = (*p)->tty; if (tty_pgrp > 0 && tty_table[tty_pgrp]) tty_pgrp = tty_table[tty_pgrp]->pgrp; else tty_pgrp = -1; return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %u \ %u %u %u %d %d %d %d %d %d %u %u %d %u %u %u %u %u %u %u %u %d \ %d %d %d %u\n", pid, (*p)->comm, state, (*p)->p_pptr->pid, (*p)->pgrp, (*p)->session, (*p)->tty, tty_pgrp, (*p)->flags, (*p)->min_flt, (*p)->cmin_flt, (*p)->maj_flt, (*p)->cmaj_flt, (*p)->utime, (*p)->stime, (*p)->cutime, (*p)->cstime, (*p)->counter, /* this is the kernel priority --- subtract 30 in your user-level program. */ (*p)->priority, /* this is the nice value --- subtract 15 in your user-level program. */ (*p)->timeout, (*p)->it_real_value, (*p)->start_time, VSIZE((*p),(*p)->kernel_stack_page), (*p)->rss, /* you might want to shift this left 3 */ (*p)->rlim[RLIMIT_RSS].rlim_cur, (*p)->start_code, (*p)->end_code, (*p)->start_stack, KSTK_ESP((*p)->kernel_stack_page), KSTK_EIP((*p)->kernel_stack_page), (*p)->signal, (*p)->blocked, sigignore, sigcatch, wchan); }
void seccomp_filter_log_failure(int syscall) { pr_info("%s[%d]: system call %d (%s) blocked at 0x%lx\n", current->comm, task_pid_nr(current), syscall, syscall_nr_to_name(syscall), KSTK_EIP(current)); }
static int get_stat(int pid, char * buffer) { struct task_struct ** p = get_task(pid), *tsk; unsigned long sigignore=0, sigcatch=0, wchan; unsigned long vsize, eip, esp; long priority, nice; int i,tty_pgrp; char state; if (!p || (tsk = *p) == NULL) return 0; if (tsk->state < 0 || tsk->state > 5) state = '.'; else state = "RSDZTW"[tsk->state]; vsize = eip = esp = 0; if (tsk->mm && tsk->mm != &init_mm) { struct vm_area_struct *vma = tsk->mm->mmap; while (vma) { vsize += vma->vm_end - vma->vm_start; vma = vma->vm_next; } if (tsk->kernel_stack_page) { eip = KSTK_EIP(tsk); esp = KSTK_ESP(tsk); } } wchan = get_wchan(tsk); if (tsk->sig) { unsigned long bit = 1; for(i=0; i<32; ++i) { switch((unsigned long) tsk->sig->action[i].sa_handler) { case 0: break; case 1: sigignore |= bit; break; default: sigcatch |= bit; } bit <<= 1; } } if (tsk->tty) tty_pgrp = tsk->tty->pgrp; else tty_pgrp = -1; /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" unix priority/nice value */ priority = tsk->counter; priority = 20 - (priority * 10 + DEF_PRIORITY / 2) / DEF_PRIORITY; nice = tsk->priority; nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY; return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu\n", pid, tsk->comm, state, tsk->p_pptr->pid, tsk->pgrp, tsk->session, tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0, tty_pgrp, tsk->flags, tsk->min_flt, tsk->cmin_flt, tsk->maj_flt, tsk->cmaj_flt, tsk->utime, tsk->stime, tsk->cutime, tsk->cstime, priority, nice, tsk->timeout, tsk->it_real_value, tsk->start_time, vsize, tsk->mm ? tsk->mm->rss : 0, /* you might want to shift this left 3 */ tsk->rlim ? tsk->rlim[RLIMIT_RSS].rlim_cur : 0, tsk->mm ? tsk->mm->start_code : 0, tsk->mm ? tsk->mm->end_code : 0, tsk->mm ? tsk->mm->start_stack : 0, esp, eip, tsk->signal, tsk->blocked, sigignore, sigcatch, wchan, tsk->nswap, tsk->cnswap); }