Example #1
0
/**
 * bpf_load: checks and returns a pointer to the requested offset
 * @off: offset into struct seccomp_data to load from
 *
 * Returns the requested 32-bits of data.
 * seccomp_chk_filter() should assure that @off is 32-bit aligned
 * and not out of bounds.  Failure to do so is a BUG.
 */
u32 seccomp_bpf_load(int off)
{
	struct pt_regs *regs = task_pt_regs(current);
	if (off == BPF_DATA(nr))
		return syscall_get_nr(current, regs);
	if (off == BPF_DATA(arch))
		return syscall_get_arch(current, regs);
	if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
		unsigned long value;
		int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
		int index = !!(off % sizeof(u64));
		syscall_get_arguments(current, regs, arg, 1, &value);
		return get_u32(value, index);
	}
	if (off == BPF_DATA(instruction_pointer))
		return get_u32(KSTK_EIP(current), 0);
	if (off == BPF_DATA(instruction_pointer) + sizeof(u32))
		return get_u32(KSTK_EIP(current), 1);
	/* seccomp_chk_filter should make this impossible. */
	BUG();
}
Example #2
0
/**
 * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
 * @syscall: syscall number to send to userland
 * @reason: filter-supplied reason code to send to userland (via si_errno)
 *
 * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
 */
static void seccomp_send_sigsys(int syscall, int reason)
{
	struct siginfo info;
	memset(&info, 0, sizeof(info));
	info.si_signo = SIGSYS;
	info.si_code = SYS_SECCOMP;
	info.si_call_addr = (void __user *)KSTK_EIP(current);
	info.si_errno = reason;
	info.si_arch = syscall_get_arch(current, task_pt_regs(current));
	info.si_syscall = syscall;
	force_sig_info(SIGSYS, &info, current);
}
Example #3
0
notrace void probe_memory_handle_fault_entry(struct mm_struct *mm,
	struct vm_area_struct *vma, unsigned long address, int write_access)
{
	struct marker *marker;
	struct serialize_long_long_int data;

	data.f1 = address;
	data.f2 = KSTK_EIP(current);
	data.f3 = write_access;

	marker = &GET_MARKER(mm_handle_fault_entry);
	ltt_specialized_trace(marker->single.probe_private,
		&data, serialize_sizeof(data), sizeof(long));
}
Example #4
0
/*
 * Endianness is explicitly ignored and left for BPF program authors to manage
 * as per the specific architecture.
 */
static void populate_seccomp_data(struct seccomp_data *sd)
{
	struct task_struct *task = current;
	struct pt_regs *regs = task_pt_regs(task);
	unsigned long args[6];

	sd->nr = syscall_get_nr(task, regs);
	sd->arch = syscall_get_arch();
	syscall_get_arguments(task, regs, 0, 6, args);
	sd->args[0] = args[0];
	sd->args[1] = args[1];
	sd->args[2] = args[2];
	sd->args[3] = args[3];
	sd->args[4] = args[4];
	sd->args[5] = args[5];
	sd->instruction_pointer = KSTK_EIP(task);
}
Example #5
0
/*
 * Notification of system call entry/exit
 * - triggered by current->work.syscall_trace
 */
asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
{
	user_exit();

	current_thread_info()->syscall = syscall;

	if (test_thread_flag(TIF_SYSCALL_TRACE)) {
		if (tracehook_report_syscall_entry(regs))
			return -1;
		syscall = current_thread_info()->syscall;
	}

#ifdef CONFIG_SECCOMP
	if (unlikely(test_thread_flag(TIF_SECCOMP))) {
		int ret, i;
		struct seccomp_data sd;
		unsigned long args[6];

		sd.nr = syscall;
		sd.arch = syscall_get_arch();
		syscall_get_arguments(current, regs, 0, 6, args);
		for (i = 0; i < 6; i++)
			sd.args[i] = args[i];
		sd.instruction_pointer = KSTK_EIP(current);

		ret = __secure_computing(&sd);
		if (ret == -1)
			return ret;
		syscall = current_thread_info()->syscall;
	}
#endif

	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
		trace_sys_enter(regs, regs->regs[2]);

	audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
			    regs->regs[6], regs->regs[7]);

	/*
	 * Negative syscall numbers are mistaken for rejected syscalls, but
	 * won't have had the return value set appropriately, so we do so now.
	 */
	if (syscall < 0)
		syscall_set_return_value(current, regs, -ENOSYS, 0);
	return syscall;
}
Example #6
0
/*
 * Endianness is explicitly ignored and left for BPF program authors to manage
 * as per the specific architecture.
 */
static void populate_seccomp_data(struct seccomp_data *sd)
{
	struct task_struct *task = current;
	struct pt_regs *regs = task_pt_regs(task);

	sd->nr = syscall_get_nr(task, regs);
	sd->arch = syscall_get_arch(task, regs);

	/* Unroll syscall_get_args to help gcc on arm. */
	syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
	syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
	syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
	syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
	syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
	syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);

	sd->instruction_pointer = KSTK_EIP(task);
}
Example #7
0
void
dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
{   uint64_t *pcstack_end = pcstack + pcstack_limit;
    volatile uint8_t *flags =
        (volatile uint8_t *)&cpu_core[cpu_get_id()].cpuc_dtrace_flags;
    unsigned long *sp;
    unsigned long *bos;

    if (*flags & CPU_DTRACE_FAULT)
        return;

    if (pcstack_limit <= 0)
        return;

    *pcstack++ = (uint64_t)current->pid;

    if (pcstack >= pcstack_end)
        return;

    /***********************************************/
    /*   Linux provides a built in function which  */
    /*   is  good  because  stack walking is arch  */
    /*   dependent.            (save_stack_trace)  */
    /*   					       */
    /*   Unfortunately  this is options dependent  */
    /*   (CONFIG_STACKTRACE) so we cannot use it.  */
    /*   And  its  GPL  anyhow, so we cannot copy  */
    /*   it.				       */
    /*   					       */
    /*   Whats worse is that we might be compiled  */
    /*   with a frame pointer (only on x86-32) so  */
    /*   we have three scenarios to handle.	       */
    /***********************************************/

    /***********************************************/
    /*   Ye  gods! The world is an awful place to  */
    /*   live. The target process, may or may not  */
    /*   have   frame  pointers.  In  fact,  some  */
    /*   frames  may have it and some may not (eg  */
    /*   different   libraries  may  be  compiled  */
    /*   differently).			       */
    /*   					       */
    /*   Looks like distro owners dont care about  */
    /*   debuggabiity,   and  give  us  no  frame  */
    /*   pointers.				       */
    /*   					       */
    /*   This  function  is  really important and  */
    /*   useful.  On  modern  Linux  systems, gdb  */
    /*   (and  pstack) contain all the smarts. In  */
    /*   fact,  pstack  is often a wrapper around  */
    /*   gdb  -  i.e. its so complex we cannot do  */
    /*   this.				       */
    /***********************************************/

    /***********************************************/
    /*   Bear  in  mind  that  user stacks can be  */
    /*   megabytes  in  size,  vs  kernel  stacks  */
    /*   which  are  limited  to a few K (4 or 8K  */
    /*   typically).			       */
    /***********************************************/

//	sp = current->thread.rsp;
# if defined(__i386)
    bos = sp = KSTK_ESP(current);
#	define	ALIGN_MASK	3
# else
    /***********************************************/
    /*   KSTK_ESP()  doesnt exist for x86_64 (its  */
    /*   set to -1).			       */
    /***********************************************/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
#  if defined(KSTK_EIP)
    /***********************************************/
    /*   Handle  ARM and more kernel independent,  */
    /*   but might not exist.		       */
    /***********************************************/
    bos = sp = (unsigned long *) KSTK_EIP(current);
#  else
    bos = sp = (unsigned long *) task_pt_regs(current)->sp;
#  endif
#else
    bos = sp = task_pt_regs(current)->rsp;
#endif
#	define	ALIGN_MASK	7
#endif

    /***********************************************/
    /*   Walk  the  stack.  We  cannot  rely on a  */
    /*   frame  pointer  at  each  level,  and we  */
    /*   really  want to avoid probing every word  */
    /*   in  the  stack  - a large stack will eat  */
    /*   cpu  looking at thousands of entries. So  */
    /*   try  and  heuristically see if we have a  */
    /*   likely  frame  pointer  to jump over the  */
    /*   frame,  but, if not, just go one word at  */
    /*   a time.				       */
    /*   					       */
    /*   Try  and be careful we dont walk outside  */
    /*   the  stack  or  walk  backwards  in  the  */
    /*   stack, too.			       */
    /***********************************************/
    {   uintptr_t *spend = sp + 1024;
        struct vm_area_struct *vma = find_vma(current->mm, (unsigned long) sp);
        if (vma)
            spend = (uintptr_t *) (vma->vm_end - sizeof(int *));

        /*printk("xbos=%p %p\n", bos, spend);*/

        /***********************************************/
        /*   Have  you ever looked at the output from  */
        /*   GCC in amd64 mode? Things like:	       */
        /*   					       */
        /*   push %r12				       */
        /*   push %rbp				       */
        /*   					       */
        /*   will make you come out in a cold sweat -  */
        /*   no   way  to  find  the  frame  pointer,  */
        /*   without doing what GDB does (ie read the  */
        /*   DWARF  stack  unwind info). So, for now,  */
        /*   you  get  some  false  positives  in the  */
        /*   output - but we try to be conservative.   */
        /***********************************************/
        while (sp >= bos && sp < spend && validate_ptr(sp)) {
            /*printk("  %p %d: %p %d\n", sp, validate_ptr(sp), sp[0], validate_ptr(sp[0]));*/
            if (validate_ptr((void *) sp[0])) {
                uintptr_t p = sp[-1];
                /***********************************************/
                /*   Try  and  avoid false positives in stack  */
                /*   entries   -   we  want  this  to  be  an  */
                /*   executable instruction.		       */
                /***********************************************/
                if (((unsigned long *) sp[0] < bos || (unsigned long *) sp[0] > spend) &&
                        (vma = find_vma(current->mm, sp[0])) != NULL &&
                        vma->vm_flags & VM_EXEC) {
                    *pcstack++ = sp[0];
                    if (pcstack >= pcstack_end)
                        break;
                }
                if (((int) p & ALIGN_MASK) == 0 && p > (uintptr_t) sp && p < (uintptr_t) spend)
                    sp = (unsigned long *) p;
            }
            sp++;
        }
    }

    /***********************************************/
    /*   Erase  anything  else  in  the buffer to  */
    /*   avoid confusion.			       */
    /***********************************************/
    while (pcstack < pcstack_end)
        *pcstack++ = (pc_t) NULL;
}
Example #8
0
static int get_stat(int pid, char * buffer)
{
	struct task_struct ** p = get_task(pid);
	unsigned long sigignore=0, sigcatch=0, bit=1, wchan;
	int i,tty_pgrp;
	char state;

	if (!p || !*p)
		return 0;
	if ((*p)->state < 0 || (*p)->state > 5)
		state = '.';
	else
		state = "RSDZTD"[(*p)->state];
	wchan = get_wchan(*p);
	for(i=0; i<32; ++i) {
		switch((int) (*p)->sigaction[i].sa_handler) {
		case 1: sigignore |= bit; break;
		case 0: break;
		default: sigcatch |= bit;
		} bit <<= 1;
	}
	tty_pgrp = (*p)->tty;
	if (tty_pgrp > 0 && tty_table[tty_pgrp])
		tty_pgrp = tty_table[tty_pgrp]->pgrp;
	else
		tty_pgrp = -1;
	return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %u \
%u %u %u %d %d %d %d %d %d %u %u %d %u %u %u %u %u %u %u %u %d \
%d %d %d %u\n",
		pid,
		(*p)->comm,
		state,
		(*p)->p_pptr->pid,
		(*p)->pgrp,
		(*p)->session,
		(*p)->tty,
		tty_pgrp,
		(*p)->flags,
		(*p)->min_flt,
		(*p)->cmin_flt,
		(*p)->maj_flt,
		(*p)->cmaj_flt,
		(*p)->utime,
		(*p)->stime,
		(*p)->cutime,
		(*p)->cstime,
		(*p)->counter,  /* this is the kernel priority ---
				   subtract 30 in your user-level program. */
		(*p)->priority, /* this is the nice value ---
				   subtract 15 in your user-level program. */
		(*p)->timeout,
		(*p)->it_real_value,
		(*p)->start_time,
		VSIZE((*p),(*p)->kernel_stack_page),
		(*p)->rss, /* you might want to shift this left 3 */
		(*p)->rlim[RLIMIT_RSS].rlim_cur,
		(*p)->start_code,
		(*p)->end_code,
		(*p)->start_stack,
		KSTK_ESP((*p)->kernel_stack_page),
		KSTK_EIP((*p)->kernel_stack_page),
		(*p)->signal,
		(*p)->blocked,
		sigignore,
		sigcatch,
		wchan);
}
Example #9
0
void seccomp_filter_log_failure(int syscall)
{
	pr_info("%s[%d]: system call %d (%s) blocked at 0x%lx\n",
		current->comm, task_pid_nr(current), syscall,
		syscall_nr_to_name(syscall), KSTK_EIP(current));
}
Example #10
0
static int get_stat(int pid, char * buffer)
{
    struct task_struct ** p = get_task(pid), *tsk;
    unsigned long sigignore=0, sigcatch=0, wchan;
    unsigned long vsize, eip, esp;
    long priority, nice;
    int i,tty_pgrp;
    char state;

    if (!p || (tsk = *p) == NULL)
        return 0;
    if (tsk->state < 0 || tsk->state > 5)
        state = '.';
    else
        state = "RSDZTW"[tsk->state];
    vsize = eip = esp = 0;
    if (tsk->mm && tsk->mm != &init_mm) {
        struct vm_area_struct *vma = tsk->mm->mmap;
        while (vma) {
            vsize += vma->vm_end - vma->vm_start;
            vma = vma->vm_next;
        }
        if (tsk->kernel_stack_page) {
            eip = KSTK_EIP(tsk);
            esp = KSTK_ESP(tsk);
        }
    }
    wchan = get_wchan(tsk);
    if (tsk->sig) {
        unsigned long bit = 1;
        for(i=0; i<32; ++i) {
            switch((unsigned long) tsk->sig->action[i].sa_handler) {
            case 0:
                break;
            case 1:
                sigignore |= bit;
                break;
            default:
                sigcatch |= bit;
            }
            bit <<= 1;
        }
    }
    if (tsk->tty)
        tty_pgrp = tsk->tty->pgrp;
    else
        tty_pgrp = -1;

    /* scale priority and nice values from timeslices to -20..20 */
    /* to make it look like a "normal" unix priority/nice value  */
    priority = tsk->counter;
    priority = 20 - (priority * 10 + DEF_PRIORITY / 2) / DEF_PRIORITY;
    nice = tsk->priority;
    nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY;

    return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu\n",
                   pid,
                   tsk->comm,
                   state,
                   tsk->p_pptr->pid,
                   tsk->pgrp,
                   tsk->session,
                   tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
                   tty_pgrp,
                   tsk->flags,
                   tsk->min_flt,
                   tsk->cmin_flt,
                   tsk->maj_flt,
                   tsk->cmaj_flt,
                   tsk->utime,
                   tsk->stime,
                   tsk->cutime,
                   tsk->cstime,
                   priority,
                   nice,
                   tsk->timeout,
                   tsk->it_real_value,
                   tsk->start_time,
                   vsize,
                   tsk->mm ? tsk->mm->rss : 0, /* you might want to shift this left 3 */
                   tsk->rlim ? tsk->rlim[RLIMIT_RSS].rlim_cur : 0,
                   tsk->mm ? tsk->mm->start_code : 0,
                   tsk->mm ? tsk->mm->end_code : 0,
                   tsk->mm ? tsk->mm->start_stack : 0,
                   esp,
                   eip,
                   tsk->signal,
                   tsk->blocked,
                   sigignore,
                   sigcatch,
                   wchan,
                   tsk->nswap,
                   tsk->cnswap);
}