static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *rec; struct syscall_metadata *sys_data; int syscall_nr; int size; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; /* get the size after alignment with the u32 buffer size field */ size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); do { char raw_data[size]; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); rec->ent.type = sys_data->enter_id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); } while(0); }
/* ftrace_syscall_enter_state - build state for filter matching * * @buf: buffer to populate with current task state for matching * @available: size available for use in the buffer. * @entry: optional pointer to the trace_entry member of the state. * * Returns 0 on success and non-zero otherwise. * If @entry is NULL, it will be ignored. */ static int ftrace_syscall_enter_state(u8 *buf, size_t available, struct trace_entry **entry) { struct syscall_trace_enter *sys_enter; struct syscall_metadata *sys_data; int size; int syscall_nr; struct pt_regs *regs = task_pt_regs(current); syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) return -EINVAL; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return -EINVAL; /* Determine the actual size needed. */ size = ftrace_syscall_enter_state_size(sys_data->nb_args); BUG_ON(size > available); sys_enter = (struct syscall_trace_enter *)buf; /* Populating the struct trace_sys_enter is left to the caller, but * a pointer is returned to encourage opacity. */ if (entry) *entry = &sys_enter->ent; sys_enter->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, sys_enter->args); return 0; }
void ftrace_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) return; if (!test_bit(syscall_nr, enabled_exit_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, sizeof(*entry), 0, 0); if (!event) return; entry = ring_buffer_event_data(event); entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) trace_current_buffer_unlock_commit(buffer, event, 0, 0); }
static int collect_syscall(struct task_struct *target, struct syscall_info *info) { struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ memset(info, 0, sizeof(*info)); info->data.nr = -1; return 0; } regs = task_pt_regs(target); if (unlikely(!regs)) { put_task_stack(target); return -EAGAIN; } info->sp = user_stack_pointer(regs); info->data.instruction_pointer = instruction_pointer(regs); info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) syscall_get_arguments(target, regs, (unsigned long *)&info->data.args[0]); put_task_stack(target); return 0; }
void ftrace_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; int size; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) return; if (!test_bit(syscall_nr, enabled_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, size, 0, 0); if (!event) return; entry = ring_buffer_event_data(event); entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) trace_current_buffer_unlock_commit(buffer, event, 0, 0); }
static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; unsigned long flags; int syscall_nr; char *raw_data; int size; int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; /* We can probably do that at build time */ size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); /* * Impossible, but be paranoid with the future * How to put this check outside runtime? */ if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "exit event has grown above profile buffer size")) return; /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); cpu = smp_processor_id(); if (in_nmi()) raw_data = rcu_dereference(trace_profile_buf_nmi); else raw_data = rcu_dereference(trace_profile_buf); if (!raw_data) goto end; raw_data = per_cpu_ptr(raw_data, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); rec->ent.type = sys_data->exit_id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); perf_tp_event(sys_data->exit_id, 0, 1, rec, size); end: local_irq_restore(flags); }
/* * Some architectures that allow for 32bit applications * to run on a 64bit kernel, do not map the syscalls for * the 32bit tasks the same as they do for 64bit tasks. * * *cough*x86*cough* * * In such a case, instead of reporting the wrong syscalls, * simply ignore them. * * For an arch to ignore the compat syscalls it needs to * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as * define the function arch_trace_is_compat_syscall() to let * the tracing system know that it should ignore it. */ static int trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) { if (unlikely(arch_trace_is_compat_syscall(regs))) return -1; return syscall_get_nr(task, regs); }
static long _fm_syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { long ret = -1; #ifdef CONFIG_X86_64 /* Check if this is a 32 bit process running on 64 bit kernel */ int ia32 = test_tsk_thread_flag(task, TIF_IA32); if (ia32) { long ia32_id = syscall_get_nr(task, regs); if (ia32_id >= 0 && ia32_id < FILEMON_SYSCALLS_IA32_SIZE) ret = _ia32_to_syscall_map[ia32_id]; } else { ret = syscall_get_nr(task, regs); } #else ret = syscall_get_nr(task, regs); #endif return ret; }
static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; unsigned long flags; char *raw_data; int syscall_nr; int size; int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; /* get the size after alignment with the u32 buffer size field */ size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "profile buffer not large enough")) return; /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); cpu = smp_processor_id(); if (in_nmi()) raw_data = rcu_dereference(trace_profile_buf_nmi); else raw_data = rcu_dereference(trace_profile_buf); if (!raw_data) goto end; raw_data = per_cpu_ptr(raw_data, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); rec->ent.type = sys_data->enter_id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); end: local_irq_restore(flags); }
asmlinkage void syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) syscall_set_nr(current, regs, -1); if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall_get_nr(current, regs)); audit_syscall_entry(regs_syscallid(regs), regs->a0, regs->a1, regs->a2, regs->a3); }
static int collect_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc) { struct pt_regs *regs = task_pt_regs(target); if (unlikely(!regs)) return -EAGAIN; *sp = user_stack_pointer(regs); *pc = instruction_pointer(regs); *callno = syscall_get_nr(target, regs); if (*callno != -1L && maxargs > 0) syscall_get_arguments(target, regs, 0, maxargs, args); return 0; }
/* * Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); syscall_get_arguments(task, regs, 0, 6, args); sd->args[0] = args[0]; sd->args[1] = args[1]; sd->args[2] = args[2]; sd->args[3] = args[3]; sd->args[4] = args[4]; sd->args[5] = args[5]; sd->instruction_pointer = KSTK_EIP(task); }
/* * Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(task, regs); /* Unroll syscall_get_args to help gcc on arm. */ syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); sd->instruction_pointer = KSTK_EIP(task); }
/** * bpf_load: checks and returns a pointer to the requested offset * @off: offset into struct seccomp_data to load from * * Returns the requested 32-bits of data. * seccomp_chk_filter() should assure that @off is 32-bit aligned * and not out of bounds. Failure to do so is a BUG. */ u32 seccomp_bpf_load(int off) { struct pt_regs *regs = task_pt_regs(current); if (off == BPF_DATA(nr)) return syscall_get_nr(current, regs); if (off == BPF_DATA(arch)) return syscall_get_arch(current, regs); if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { unsigned long value; int arg = (off - BPF_DATA(args[0])) / sizeof(u64); int index = !!(off % sizeof(u64)); syscall_get_arguments(current, regs, arg, 1, &value); return get_u32(value, index); } if (off == BPF_DATA(instruction_pointer)) return get_u32(KSTK_EIP(current), 0); if (off == BPF_DATA(instruction_pointer) + sizeof(u32)) return get_u32(KSTK_EIP(current), 1); /* seccomp_chk_filter should make this impossible. */ BUG(); }
static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit rec; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; tracing_generic_entry_update(&rec.ent, 0, 0); rec.ent.type = sys_data->exit_id; rec.nr = syscall_nr; rec.ret = syscall_get_return_value(current, regs); perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); }
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; int syscall_nr; int rctx; int size; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; /* We can probably do that at build time */ size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); /* * Impossible, but be paranoid with the future * How to put this check outside runtime? */ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "exit event has grown above perf buffer size")) return; rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, sys_data->exit_event->event.type, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); head = this_cpu_ptr(sys_data->exit_event->perf_events); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); }
void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; unsigned long irq_flags; int pc; int size; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) return; if (!test_bit(syscall_nr, enabled_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; local_save_flags(irq_flags); pc = preempt_count(); event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; entry = ring_buffer_event_data(event); entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); }
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; int syscall_nr; int rctx; int size; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; /* get the size after alignment with the u32 buffer size field */ size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "perf buffer not large enough")) return; rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, sys_data->enter_event->event.type, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); head = this_cpu_ptr(sys_data->enter_event->perf_events); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); }
/* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ static void do_signal(struct pt_regs *regs) { struct k_sigaction ka; siginfo_t info; int signr; sigset_t *oldset; /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything * if so. * X86_32: vm86 regs switched out by assembly code before reaching * here, so testing against kernel CS suffices. */ if (!user_mode_novm(regs)) return; if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { /* * Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. */ if (current->thread.debugreg7) set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply * clear the TS_RESTORE_SIGMASK flag. */ current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } return; } /* Did we come from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; break; case -ERESTART_RESTARTBLOCK: regs->ax = NR_restart_syscall; regs->ip -= 2; break; } } /* * If there's no signal to deliver, we just put the saved sigmask * back. */ if (current_thread_info()->status & TS_RESTORE_SIGMASK) { current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } }
static inline int trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) { return syscall_get_nr(task, regs); }
int __secure_computing(int this_syscall) { int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; switch (mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { if (*syscall == this_syscall) return 0; } while (*++syscall); exit_sig = SIGKILL; ret = SECCOMP_RET_KILL; break; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: { int data; struct pt_regs *regs = task_pt_regs(current); ret = seccomp_run_filters(this_syscall); data = ret & SECCOMP_RET_DATA; ret &= SECCOMP_RET_ACTION; switch (ret) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ syscall_set_return_value(current, regs, -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ syscall_rollback(current, regs); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; case SECCOMP_RET_TRACE: /* Skip these calls if there is no tracer. */ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { syscall_set_return_value(current, regs, -ENOSYS, 0); goto skip; } /* Allow the BPF to provide the event message */ ptrace_event(PTRACE_EVENT_SECCOMP, data); /* * The delivery of a fatal signal during event * notification may silently skip tracer notification. * Terminating the task now avoids executing a system * call that may not be intended. */ if (fatal_signal_pending(current)) break; if (syscall_get_nr(current, regs) < 0) goto skip; /* Explicit request to skip. */ return 0; case SECCOMP_RET_ALLOW: return 0; case SECCOMP_RET_KILL: default: break; } exit_sig = SIGSYS; break; } #endif default: BUG(); } #ifdef SECCOMP_DEBUG dump_stack(); #endif audit_seccomp(this_syscall, exit_sig, ret); do_exit(exit_sig); #ifdef CONFIG_SECCOMP_FILTER skip: audit_seccomp(this_syscall, exit_sig, ret); #endif return -1; }
static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { int ret; /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; break; case -ERESTARTSYS: if (!(ka->sa.sa_flags & SA_RESTART)) { regs->ax = -EINTR; break; } /* fallthrough */ case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; break; } } /* * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF * flag so that register information in the sigcontext is correct. */ if (unlikely(regs->flags & X86_EFLAGS_TF) && likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret) return ret; #ifdef CONFIG_X86_64 /* * This has nothing to do with segment registers, * despite the name. This magic affects uaccess.h * macros' behavior. Reset it to the normal setting. */ set_fs(USER_DS); #endif /* * Clear the direction flag as per the ABI for function entry. */ regs->flags &= ~X86_EFLAGS_DF; /* * Clear TF when entering the signal handler, but * notify any tracer that was single-stepping it. * The tracer may want to single-step inside the * handler too. */ regs->flags &= ~X86_EFLAGS_TF; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); tracehook_signal_handler(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP)); return 0; }