static void install_filter(void) { struct sock_filter filter[] = { /* Load architecture */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), /* Kill process if the architecture is not what we expect */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), /* Load system call number */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))), /* Allow system calls other than open() */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* Kill process on open() */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL) }; struct sock_fprog prog = { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), .filter = filter, }; if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog) == -1) errExit("seccomp"); /* On Linux 3.16 and earlier, we must instead use: if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) errExit("prctl-PR_SET_SECCOMP"); */ } int main(int argc, char **argv) { if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) errExit("prctl"); install_filter(); if (open("/tmp/a", O_RDONLY) == -1) errExit("open"); printf("We shouldn't see this message\n"); exit(EXIT_SUCCESS); }
int sc_apply_seccomp_bpf(const char *filter_profile) { debug("loading bpf program for security tag %s", filter_profile); char profile_path[PATH_MAX] = { 0 }; sc_must_snprintf(profile_path, sizeof(profile_path), "%s/%s.bin", filter_profile_dir, filter_profile); // Wait some time for the security profile to show up. When // the system boots snapd will created security profiles, but // a service snap (e.g. network-manager) starts in parallel with // snapd so for such snaps, the profiles may not be generated // yet long max_wait = 120; const char *MAX_PROFILE_WAIT = getenv("SNAP_CONFINE_MAX_PROFILE_WAIT"); if (MAX_PROFILE_WAIT != NULL) { char *endptr = NULL; errno = 0; long env_max_wait = strtol(MAX_PROFILE_WAIT, &endptr, 10); if (errno != 0 || MAX_PROFILE_WAIT == endptr || *endptr != '\0' || env_max_wait <= 0) { die("SNAP_CONFINE_MAX_PROFILE_WAIT invalid"); } max_wait = env_max_wait > 0 ? env_max_wait : max_wait; } if (max_wait > 3600) { max_wait = 3600; } for (long i = 0; i < max_wait; ++i) { if (access(profile_path, F_OK) == 0) { break; } sleep(1); } // validate '/' down to profile_path are root-owned and not // 'other' writable to avoid possibility of privilege // escalation via bpf program load when paths are incorrectly // set on the system. validate_bpfpath_is_safe(profile_path); // load bpf char bpf[MAX_BPF_SIZE + 1] = { 0 }; // account for EOF FILE *fp = fopen(profile_path, "rb"); if (fp == NULL) { die("cannot read %s", profile_path); } // set 'size' to 1 to get bytes transferred size_t num_read = fread(bpf, 1, sizeof(bpf), fp); if (ferror(fp) != 0) { die("cannot read seccomp profile %s", profile_path); } else if (feof(fp) == 0) { die("seccomp profile %s exceeds %zu bytes", profile_path, sizeof(bpf)); } fclose(fp); debug("read %zu bytes from %s", num_read, profile_path); if (sc_streq(bpf, "@unrestricted\n")) { return 0; } uid_t real_uid, effective_uid, saved_uid; if (getresuid(&real_uid, &effective_uid, &saved_uid) < 0) { die("cannot call getresuid"); } // If we can, raise privileges so that we can load the BPF into the // kernel via 'prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...)'. debug("raising privileges to load seccomp profile"); if (effective_uid != 0 && saved_uid == 0) { if (seteuid(0) != 0) { die("seteuid failed"); } if (geteuid() != 0) { die("raising privs before seccomp_load did not work"); } } // Load filter into the kernel. Importantly we are // intentionally *not* setting NO_NEW_PRIVS because it // interferes with exec transitions in AppArmor with certain // snappy interfaces. Not setting NO_NEW_PRIVS does mean that // applications can adjust their sandbox if they have // CAP_SYS_ADMIN or, if running on < 4.8 kernels, break out of // the seccomp via ptrace. Both CAP_SYS_ADMIN and 'ptrace // (trace)' are blocked by AppArmor with typical snappy // interfaces. struct sock_fprog prog = { .len = num_read / sizeof(struct sock_filter), .filter = (struct sock_filter *)bpf, }; if (seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, &prog) != 0) { if (errno == ENOSYS) { debug("kernel doesn't support the seccomp(2) syscall"); } else if (errno == EINVAL) { debug ("kernel may not support the SECCOMP_FILTER_FLAG_LOG flag"); } debug ("falling back to prctl(2) syscall to load seccomp filter"); if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) != 0) { die("cannot apply seccomp profile"); } } // drop privileges again debug("dropping privileges after loading seccomp profile"); if (geteuid() == 0) { unsigned real_uid = getuid(); if (seteuid(real_uid) != 0) { die("seteuid failed"); } if (real_uid != 0 && geteuid() == 0) { die("dropping privs after seccomp_load did not work"); } } return 0; }
int main(int argc, char **argv) { unsigned pid = 0; int i; // handle CTRL-C signal (SIGINT, my_handler); signal (SIGTERM, my_handler); for (i = 1; i < argc; i++) { // default options if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-?") == 0) { usage(); return 0; } else if (strcmp(argv[i], "--version") == 0) { printf("firemon version %s\n\n", VERSION); return 0; } // options without a pid argument else if (strcmp(argv[i], "--top") == 0) { top(); // never to return } else if (strcmp(argv[i], "--list") == 0) { list(); return 0; } else if (strcmp(argv[i], "--netstats") == 0) { struct stat s; if (getuid() != 0 && stat("/proc/sys/kernel/grsecurity", &s) == 0) { fprintf(stderr, "Error: this feature is not available on Grsecurity systems\n"); exit(1); } netstats(); return 0; } // cumulative options with or without a pid argument else if (strcmp(argv[i], "--x11") == 0) { arg_x11 = 1; } else if (strcmp(argv[i], "--cgroup") == 0) { arg_cgroup = 1; } else if (strcmp(argv[i], "--cpu") == 0) { arg_cpu = 1; } else if (strcmp(argv[i], "--seccomp") == 0) { arg_seccomp = 1; } else if (strcmp(argv[i], "--caps") == 0) { arg_caps = 1; } else if (strcmp(argv[i], "--tree") == 0) { arg_tree = 1; } else if (strcmp(argv[i], "--interface") == 0) { arg_interface = 1; } else if (strcmp(argv[i], "--route") == 0) { arg_route = 1; } else if (strcmp(argv[i], "--arp") == 0) { arg_arp = 1; } else if (strncmp(argv[i], "--name=", 7) == 0) { char *name = argv[i] + 7; if (name2pid(name, (pid_t *) &pid)) { fprintf(stderr, "Error: cannot find sandbox %s\n", name); return 1; } } // etc else if (strcmp(argv[i], "--nowrap") == 0) arg_nowrap = 1; // invalid option else if (*argv[i] == '-') { fprintf(stderr, "Error: invalid option\n"); return 1; } // PID argument else { // this should be a pid number char *ptr = argv[i]; while (*ptr != '\0') { if (!isdigit(*ptr)) { fprintf(stderr, "Error: not a valid PID number\n"); exit(1); } ptr++; } sscanf(argv[i], "%u", &pid); break; } } if (arg_tree) tree((pid_t) pid); if (arg_interface) interface((pid_t) pid); if (arg_route) route((pid_t) pid); if (arg_arp) arp((pid_t) pid); if (arg_seccomp) seccomp((pid_t) pid); if (arg_caps) caps((pid_t) pid); if (arg_cpu) cpu((pid_t) pid); if (arg_cgroup) cgroup((pid_t) pid); if (arg_x11) x11((pid_t) pid); if (!arg_route && !arg_arp && !arg_interface && !arg_tree && !arg_caps && !arg_seccomp && !arg_x11) procevent((pid_t) pid); // never to return return 0; }
static void install_filter(void) { struct sock_filter filter[] = { /* Load architecture */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), /* Kill process if the architecture is not what we expect */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), /* Load system call number */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))), /* Allow system calls other than open() */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* Kill process on open() */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL) }; struct sock_fprog prog = { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), .filter = filter, }; if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog) == -1) errExit("seccomp"); } int main(int argc, char *argv[]) { int j, nloops; if (argc < 2) { fprintf(stderr, "Usage: %s <num-loops> [x]\n", argv[0]); fprintf(stderr, " (use 'x' to run with BPF filter applied)\n"); exit(EXIT_FAILURE); } if (argc > 2) { printf("Appling BPF filter\n"); if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) errExit("prctl"); install_filter(); } nloops = atoi(argv[1]); for (j = 0; j < nloops; j++) getppid(); exit(EXIT_SUCCESS); }
static void install_filter(void) { struct sock_filter filter[] = { /* Load architecture */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), /* Kill process if the architecture is not what we expect */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), /* Load system call number */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))), /* Allow system calls other than lseek() */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lseek, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* Load top 4 bytes of 'offset' argument; fail with errno==2 if these bytes are nonzero. The code here assumes a little-endian architecture (i.e., that the fist 4 bytes are the least significant bytes of the 64-bit argument and the following 4 bytes are the most significant bytes). There are some macros in the kernel source file samples/seccomp/bpf-helper.h that show how endianess differences can be abstracted away when dealing with 64-bit arguments. */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, args[1]) + sizeof(__u32))), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 2), /* Load bottom 4 bytes of 'offset' argument; fail with errno==1 if the value is > 1000; otherwise allow the system call */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, args[1]))), BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, 1000, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), .filter = filter, }; if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog) == -1) errExit("seccomp"); /* On Linux 3.16 and earlier, we must instead use: if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) errExit("prctl-PR_SET_SECCOMP"); */ } static void seek_test(int fd, off_t offset) { off_t ret; printf("Seek to byte %lld: ", (long long) offset); ret = lseek(fd, offset, SEEK_SET); if (ret == 0) printf("succeeded\n"); else printf("failed with errno = %d\n", errno); }
static void install_filter(void) { struct sock_filter filter[] = { /* Load architecture */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), /* Kill the process if the architecture is not what we expect */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), /* Load system call number */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))), /* Allow syscalls other than open() */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* Load second argument of open() (flags) into accumulator */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, args[1]))), /* Kill the process if O_CREAT was specified */ BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, O_CREAT, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), /* Give ENOTSUP error on attempt to open for writing. Relies on the fact that O_RDWR and O_WRONLY are defined as single, nonoverlapping bits */ BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, O_WRONLY | O_RDWR, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ENOTSUP & SECCOMP_RET_DATA)), /* Otherwise allow the open() */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW) }; struct sock_fprog prog = { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), .filter = filter, }; if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)) errExit("seccomp"); /* On Linux 3.16 and earlier, we must instead use: if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) errExit("prctl-PR_SET_SECCOMP"); */ } int main(int argc, char **argv) { if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) errExit("prctl"); install_filter(); if (open("/tmp/a", O_RDONLY) == -1) perror("open1"); if (open("/tmp/a", O_WRONLY) == -1) perror("open2"); if (open("/tmp/a", O_RDWR) == -1) perror("open3"); if (open("/tmp/a", O_CREAT | O_RDWR, 0600) == -1) perror("open4"); exit(EXIT_SUCCESS); }