static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); for (int cpu = 0; ; cpu++) { cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(cpu, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) return; unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; long ret_sys, ret_vdso = 1, ret_vsys = 1; unsigned node; ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); if (vdso_getcpu) ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); if (vgetcpu) ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); if (!ret_sys) node = node_sys; else if (!ret_vdso) node = node_vdso; else if (!ret_vsys) node = node_vsys; bool ok = true; if (!ret_sys && (cpu_sys != cpu || node_sys != node)) ok = false; if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node)) ok = false; if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node)) ok = false; printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu); if (!ret_sys) printf(" syscall: cpu %u, node %u", cpu_sys, node_sys); if (!ret_vdso) printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso); if (!ret_vsys) printf(" vsyscall: cpu %u, node %u", cpu_vsys, node_vsys); printf("\n"); if (!ok) nerrs++; } }
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_error; long ret; /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ WARN_ON_ONCE(address != regs->ip); if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); return false; } vsyscall_nr = addr_to_vsyscall_nr(address); trace_emulate_vsyscall(vsyscall_nr); if (vsyscall_nr < 0) { warn_bad_vsyscall(KERN_WARNING, regs, "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); goto sigsegv; } if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { warn_bad_vsyscall(KERN_WARNING, regs, "vsyscall with bad stack (exploit attempt?)"); goto sigsegv; } tsk = current; /* * Check for access_ok violations and find the syscall nr. * * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ switch (vsyscall_nr) { case 0: if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || !write_ok_or_segv(regs->si, sizeof(struct timezone))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_gettimeofday; break; case 1: if (!write_ok_or_segv(regs->di, sizeof(time_t))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_time; break; case 2: if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || !write_ok_or_segv(regs->si, sizeof(unsigned))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_getcpu; break; } /* * Handle seccomp. regs->ip must be the original value. * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. * * We could optimize the seccomp disabled case, but performance * here doesn't matter. */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; tmp = secure_computing(syscall_nr); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); do_exit(SIGSYS); } if (tmp) goto do_ret; /* skip requested */ /* * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; current_thread_info()->sig_on_uaccess_error = 1; ret = -EFAULT; switch (vsyscall_nr) { case 0: ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: ret = sys_time((time_t __user *)regs->di); break; case 2: ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); break; } current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); /* * If we failed to generate a signal for any reason, * generate one here. (This should be impossible.) */ if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && !sigismember(&tsk->pending.signal, SIGSEGV))) goto sigsegv; return true; /* Don't emulate the ret. */ } regs->ax = ret; do_ret: /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; return true; sigsegv: force_sig(SIGSEGV, current); return true; }
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; int vsyscall_nr; long ret; /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ WARN_ON_ONCE(address != regs->ip); if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); return false; } vsyscall_nr = addr_to_vsyscall_nr(address); trace_emulate_vsyscall(vsyscall_nr); if (vsyscall_nr < 0) { warn_bad_vsyscall(KERN_WARNING, regs, "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); goto sigsegv; } if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { warn_bad_vsyscall(KERN_WARNING, regs, "vsyscall with bad stack (exploit attempt?)"); goto sigsegv; } tsk = current; if (seccomp_mode(&tsk->seccomp)) do_exit(SIGKILL); switch (vsyscall_nr) { case 0: ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: ret = sys_time((time_t __user *)regs->di); break; case 2: ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, 0); break; } if (ret == -EFAULT) { /* * Bad news -- userspace fed a bad pointer to a vsyscall. * * With a real vsyscall, that would have caused SIGSEGV. * To make writing reliable exploits using the emulated * vsyscalls harder, generate SIGSEGV here as well. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); goto sigsegv; } regs->ax = ret; /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; return true; sigsegv: force_sig(SIGSEGV, current); return true; }
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; int vsyscall_nr; int prev_sig_on_uaccess_error; long ret; /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ WARN_ON_ONCE(address != regs->ip); if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); return false; } vsyscall_nr = addr_to_vsyscall_nr(address); trace_emulate_vsyscall(vsyscall_nr); if (vsyscall_nr < 0) { warn_bad_vsyscall(KERN_WARNING, regs, "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); goto sigsegv; } if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { warn_bad_vsyscall(KERN_WARNING, regs, "vsyscall with bad stack (exploit attempt?)"); goto sigsegv; } tsk = current; if (seccomp_mode(&tsk->seccomp)) do_exit(SIGKILL); /* * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; current_thread_info()->sig_on_uaccess_error = 1; /* * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ ret = -EFAULT; switch (vsyscall_nr) { case 0: if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || !write_ok_or_segv(regs->si, sizeof(struct timezone))) break; ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: if (!write_ok_or_segv(regs->di, sizeof(time_t))) break; ret = sys_time((time_t __user *)regs->di); break; case 2: if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || !write_ok_or_segv(regs->si, sizeof(unsigned))) break; ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); break; } current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); /* * If we failed to generate a signal for any reason, * generate one here. (This should be impossible.) */ if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && !sigismember(&tsk->pending.signal, SIGSEGV))) goto sigsegv; return true; /* Don't emulate the ret. */ } regs->ax = ret; /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; return true; sigsegv: force_sig(SIGSEGV, current); return true; }