/** * Open the libaudit fd if appropriate. */ void bus_audit_init (BusContext *context) { #ifdef HAVE_LIBAUDIT int i; capng_get_caps_process (); /* Work around a bug in libcap-ng < 0.7.7: it leaks a fd, which isn't * close-on-exec. Assume it will be one of the first few fds. */ for (i = 3; i < 42; i++) _dbus_fd_set_close_on_exec (i); if (!capng_have_capability (CAPNG_EFFECTIVE, CAP_AUDIT_WRITE)) return; audit_fd = audit_open (); if (audit_fd < 0) { int e = errno; /* If kernel doesn't support audit, bail out */ if (e == EINVAL || e == EPROTONOSUPPORT || e == EAFNOSUPPORT) return; bus_context_log (context, DBUS_SYSTEM_LOG_WARNING, "Failed to open connection to the audit subsystem: %s", _dbus_strerror (e)); } #endif /* HAVE_LIBAUDIT */ }
static void log_callback (const char *fmt, ...) { va_list ap; va_start(ap, fmt); #ifdef HAVE_LIBAUDIT if (audit_fd >= 0) { capng_get_caps_process(); if (capng_have_capability(CAPNG_EFFECTIVE, CAP_AUDIT_WRITE)) { char buf[PATH_MAX*2]; /* FIXME: need to change this to show real user */ vsnprintf(buf, sizeof(buf), fmt, ap); audit_log_user_avc_message(audit_fd, AUDIT_USER_AVC, buf, NULL, NULL, NULL, getuid()); return; } } #endif /* HAVE_LIBAUDIT */ vsyslog (LOG_USER | LOG_INFO, fmt, ap); va_end(ap); }
INTERNAL_QUAL int rtos_task_check_scheduler(int* scheduler) { #ifdef ORO_OS_LINUX_CAP_NG if(capng_get_caps_process()) { log(Error) << "Failed to retrieve capabilities (lowering to SCHED_OTHER)." <<endlog(); *scheduler = SCHED_OTHER; return -1; } #endif if (*scheduler != SCHED_OTHER && geteuid() != 0 #ifdef ORO_OS_LINUX_CAP_NG && capng_have_capability(CAPNG_EFFECTIVE, CAP_SYS_NICE)==0 #endif ) { // they're not root and they want a real-time priority, which _might_ // be acceptable if they're using pam_limits and have set the rtprio ulimit // (see "/etc/security/limits.conf" and "ulimit -a") struct rlimit r; if ((0 != getrlimit(RLIMIT_RTPRIO, &r)) || (0 == r.rlim_cur)) { log(Warning) << "Lowering scheduler type to SCHED_OTHER for non-privileged users.." <<endlog(); *scheduler = SCHED_OTHER; return -1; } } if (*scheduler != SCHED_OTHER && *scheduler != SCHED_FIFO && *scheduler != SCHED_RR ) { log(Error) << "Unknown scheduler type." <<endlog(); *scheduler = SCHED_OTHER; return -1; } return 0; }
void secure_capng_get_caps_process() { auto ret = int { capng_get_caps_process() }; /// does syscall to actually read state bool fail = (ret!=0); bool badval = false; if (fail||badval) { std::ostringstream oss; oss<<"Error: " << (fail ? "FAILED":"") << " " << (badval ? "BAD-VALUE":"") << " (ret="<<ret<<") in " << __func__ << "." ; throw capmodpp_error(oss.str()); } }
/* Linux specific implementation of daemon_become_new_user() * using libcap-ng. */ static void daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) { #if defined __linux__ && HAVE_LIBCAPNG int ret; ret = capng_get_caps_process(); if (!ret) { if (capng_have_capabilities(CAPNG_SELECT_CAPS) > CAPNG_NONE) { const capng_type_t cap_sets = CAPNG_EFFECTIVE|CAPNG_PERMITTED; capng_clear(CAPNG_SELECT_BOTH); ret = capng_update(CAPNG_ADD, cap_sets, CAP_IPC_LOCK) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BIND_SERVICE); if (access_datapath && !ret) { ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST); } } else { ret = -1; } } if (!ret) { /* CAPNG_INIT_SUPP_GRP will be a better choice than * CAPNG_DROP_SUPP_GRP. However this enum value is only defined * with libcap-ng higher than version 0.7.4, which is not wildly * available on many Linux distributions yet. Taking a more * conservative approach to make sure OVS behaves consistently. * * XXX We may change this for future OVS releases. */ ret = capng_change_id(uid, gid, CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING); } if (ret) { VLOG_FATAL("%s: libcap-ng fail to switch to user and group " "%d:%d, aborting", pidfile, uid, gid); } #endif }
void *thread2_main(void *arg) { sleep(1); #ifdef DEBUG printf("thread2 getting capabilities\n"); #endif capng_get_caps_process(); if (capng_have_capabilities(CAPNG_SELECT_CAPS) != CAPNG_NONE) { printf("Detected capabilities when they should not be any\n"); exit(1); } capng_clear(CAPNG_SELECT_BOTH); #ifdef DEBUG printf("SUCCESS: No capabilities reported\n"); #endif return NULL; }
int main(int argc, char **argv) { const char *atsec = ""; /* * Be careful just in case a setgid or setcapped copy of this * helper gets out. */ if (argc != 5) ksft_exit_fail_msg("wrong argc\n"); #ifdef HAVE_GETAUXVAL if (getauxval(AT_SECURE)) atsec = " (AT_SECURE is set)"; else atsec = " (AT_SECURE is not set)"; #endif capng_get_caps_process(); if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { ksft_print_msg("Wrong effective state%s\n", atsec); return 1; } if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { ksft_print_msg("Wrong permitted state%s\n", atsec); return 1; } if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { ksft_print_msg("Wrong inheritable state%s\n", atsec); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { ksft_print_msg("Wrong ambient state%s\n", atsec); return 1; } ksft_print_msg("%s: Capabilities after execve were correct\n", "validate_cap:"); return 0; }
// Update the capabilities of the running process to include the given // capability in the Ambient set. static void set_ambient_cap(cap_value_t cap) { capng_get_caps_process(); if (capng_update(CAPNG_ADD, CAPNG_INHERITABLE, (unsigned long) cap)) { perror("cannot raise the capability into the Inheritable set\n"); exit(1); } capng_apply(CAPNG_SELECT_CAPS); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) { perror("cannot raise the capability into the Ambient set\n"); exit(1); } }
/* * This is running as the 'init' process insid the container. * It removes some capabilities that could be dangerous to * host system, since they are not currently "containerized" */ static int lxcContainerDropCapabilities(void) { #if HAVE_CAPNG int ret; capng_get_caps_process(); if ((ret = capng_updatev(CAPNG_DROP, CAPNG_EFFECTIVE | CAPNG_PERMITTED | CAPNG_INHERITABLE | CAPNG_BOUNDING_SET, CAP_SYS_BOOT, /* No use of reboot */ CAP_SYS_MODULE, /* No kernel module loading */ CAP_SYS_TIME, /* No changing the clock */ CAP_AUDIT_CONTROL, /* No messing with auditing status */ CAP_MAC_ADMIN, /* No messing with LSM config */ -1 /* sentinal */)) < 0) { lxcError(VIR_ERR_INTERNAL_ERROR, _("Failed to remove capabilities: %d"), ret); return -1; } if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { lxcError(VIR_ERR_INTERNAL_ERROR, _("Failed to apply capabilities: %d"), ret); return -1; } /* We do not need to call capng_lock() in this case. The bounding * set restriction will prevent them reacquiring sys_boot/module/time, * etc which is all that matters for the container. Once inside the * container it is fine for SECURE_NOROOT / SECURE_NO_SETUID_FIXUP to * be unmasked - they can never escape the bounding set. */ #else VIR_WARN0("libcap-ng support not compiled in, unable to clear capabilities"); #endif return 0; }
int main(int argc, char *argv[]) { char *unikernel; enum { QEMU, KVM, UKVM, UNIX } hypervisor; if (argc < 3) { fprintf(stderr, "usage: runner HYPERVISOR UNIKERNEL [ ARGS... ]\n"); fprintf(stderr, "HYPERVISOR: qemu | kvm | ukvm | unix\n"); return 1; } if (strcmp(argv[1], "qemu") == 0) hypervisor = QEMU; else if (strcmp(argv[1], "kvm") == 0) hypervisor = KVM; else if (strcmp(argv[1], "ukvm") == 0) hypervisor = UKVM; else if (strcmp(argv[1], "unix") == 0) hypervisor = UNIX; else { warnx("error: Invalid hypervisor: %s", argv[1]); return 1; } unikernel = argv[2]; /* * Remaining arguments are to be passed on to the unikernel. */ argv += 3; argc -= 3; /* * Check we have CAP_NET_ADMIN. */ if (capng_get_caps_process() != 0) { warnx("error: capng_get_caps_process() failed"); return 1; } if (!capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_ADMIN)) { warnx("error: CAP_NET_ADMIN is required"); return 1; } /* * Connect to netlink, load link cache from kernel. */ struct nl_sock *sk; struct nl_cache *link_cache; int err; sk = nl_socket_alloc(); assert(sk); err = nl_connect(sk, NETLINK_ROUTE); if (err < 0) { warnx("nl_connect() failed: %s", nl_geterror(err)); return 1; } err = rtnl_link_alloc_cache(sk, AF_UNSPEC, &link_cache); if (err < 0) { warnx("rtnl_link_alloc_cache() failed: %s", nl_geterror(err)); return 1; } /* * Retrieve container network configuration -- IP address and * default gateway. */ struct rtnl_link *l_veth; l_veth = rtnl_link_get_by_name(link_cache, VETH_LINK_NAME); if (l_veth == NULL) { warnx("error: Could not get link information for %s", VETH_LINK_NAME); return 1; } struct nl_addr *veth_addr; err = get_link_inet_addr(sk, l_veth, &veth_addr); if (err) { warnx("error: Unable to determine IP address of %s", VETH_LINK_NAME); return 1; } struct nl_addr *gw_addr; err = get_default_gw_inet_addr(sk, &gw_addr); if (err) { warnx("error: get_deGfault_gw_inet_addr() failed"); return 1; } if (gw_addr == NULL) { warnx("error: No default gateway found. This is currently " "not supported"); return 1; } /* * Create bridge and tap interface, enslave veth and tap interfaces to * bridge. */ err = create_bridge_link(sk, BRIDGE_LINK_NAME); if (err < 0) { warnx("create_bridge_link(%s) failed: %s", BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } int tap_fd; if (hypervisor == UKVM) err = create_tap_link(TAP_LINK_NAME, &tap_fd); else err = create_tap_link(TAP_LINK_NAME, NULL); if (err != 0) { warnx("create_tap_link(%s) failed: %s", TAP_LINK_NAME, strerror(err)); return 1; } /* Refill link cache with newly-created interfaces */ nl_cache_refill(sk, link_cache); struct rtnl_link *l_bridge; l_bridge = rtnl_link_get_by_name(link_cache, BRIDGE_LINK_NAME); if (l_bridge == NULL) { warnx("error: Could not get link information for %s", BRIDGE_LINK_NAME); return 1; } struct rtnl_link *l_tap; l_tap = rtnl_link_get_by_name(link_cache, TAP_LINK_NAME); if (l_tap == NULL) { warnx("error: Could not get link information for %s", TAP_LINK_NAME); return 1; } err = rtnl_link_enslave(sk, l_bridge, l_veth); if (err < 0) { warnx("error: Unable to enslave %s to %s: %s", VETH_LINK_NAME, BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } err = rtnl_link_enslave(sk, l_bridge, l_tap); if (err < 0) { warnx("error: Unable to enslave %s to %s: %s", TAP_LINK_NAME, BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } /* * Flush all IPv4 addresses from the veth interface. This is now safe * as we are good to commit and have retrieved the existing configuration. */ struct rtnl_addr *flush_addr; flush_addr = rtnl_addr_alloc(); assert(flush_addr); rtnl_addr_set_ifindex(flush_addr, rtnl_link_get_ifindex(l_veth)); rtnl_addr_set_family(flush_addr, AF_INET); rtnl_addr_set_local(flush_addr, veth_addr); err = rtnl_addr_delete(sk, flush_addr, 0); if (err < 0) { warnx("error: Could not flush addresses on %s: %s", VETH_LINK_NAME, nl_geterror(err)); return 1; } rtnl_addr_put(flush_addr); /* * Bring up the tap and bridge interfaces. */ struct rtnl_link *l_up; l_up = rtnl_link_alloc(); assert(l_up); /* You'd think set_operstate was the thing to do here. It's not. */ rtnl_link_set_flags(l_up, IFF_UP); err = rtnl_link_change(sk, l_tap, l_up, 0); if (err < 0) { warnx("error: rtnl_link_change(%s, UP) failed: %s", TAP_LINK_NAME, nl_geterror(err)); return 1; } err = rtnl_link_change(sk, l_bridge, l_up, 0); if (err < 0) { warnx("error: rtnl_link_change(%s, UP) failed: %s", BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } rtnl_link_put(l_up); /* * Collect network configuration data. */ char ip[AF_INET_BUFSIZE]; if (inet_ntop(AF_INET, nl_addr_get_binary_addr(veth_addr), ip, sizeof ip) == NULL) { perror("inet_ntop()"); return 1; } char uarg_ip[AF_INET_BUFSIZE]; unsigned int prefixlen = nl_addr_get_prefixlen(veth_addr); snprintf(uarg_ip, sizeof uarg_ip, "%s/%u", ip, prefixlen); char uarg_gw[AF_INET_BUFSIZE]; if (inet_ntop(AF_INET, nl_addr_get_binary_addr(gw_addr), uarg_gw, sizeof uarg_gw) == NULL) { perror("inet_ntop()"); return 1; } /* * Build unikernel and hypervisor arguments. */ ptrvec* uargpv = pvnew(); char *uarg_buf; /* * QEMU/KVM: * /usr/bin/qemu-system-x86_64 <qemu args> -kernel <unikernel> -append "<unikernel args>" */ if (hypervisor == QEMU || hypervisor == KVM) { pvadd(uargpv, "/usr/bin/qemu-system-x86_64"); pvadd(uargpv, "-nodefaults"); pvadd(uargpv, "-no-acpi"); pvadd(uargpv, "-display"); pvadd(uargpv, "none"); pvadd(uargpv, "-serial"); pvadd(uargpv, "stdio"); pvadd(uargpv, "-m"); pvadd(uargpv, "512"); if (hypervisor == KVM) { pvadd(uargpv, "-enable-kvm"); pvadd(uargpv, "-cpu"); pvadd(uargpv, "host"); } else { /* * Required for AESNI use in Mirage. */ pvadd(uargpv, "-cpu"); pvadd(uargpv, "Westmere"); } pvadd(uargpv, "-device"); char *guest_mac = generate_mac(); assert(guest_mac); err = asprintf(&uarg_buf, "virtio-net-pci,netdev=n0,mac=%s", guest_mac); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "-netdev"); err = asprintf(&uarg_buf, "tap,id=n0,ifname=%s,script=no,downscript=no", TAP_LINK_NAME); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "-kernel"); pvadd(uargpv, unikernel); pvadd(uargpv, "-append"); /* * TODO: Replace any occurences of ',' with ',,' in -append, because * QEMU arguments are insane. */ char cmdline[1024]; char *cmdline_p = cmdline; size_t cmdline_free = sizeof cmdline; for (; *argv; argc--, argv++) { size_t alen = snprintf(cmdline_p, cmdline_free, "%s%s", *argv, (argc > 1) ? " " : ""); if (alen >= cmdline_free) { warnx("error: Command line too long"); return 1; } cmdline_free -= alen; cmdline_p += alen; } size_t alen = snprintf(cmdline_p, cmdline_free, "--ipv4=%s --ipv4-gateway=%s", uarg_ip, uarg_gw); if (alen >= cmdline_free) { warnx("error: Command line too long"); return 1; } pvadd(uargpv, cmdline); } /* * UKVM: * /unikernel/ukvm <ukvm args> <unikernel> -- <unikernel args> */ else if (hypervisor == UKVM) { pvadd(uargpv, "/unikernel/ukvm"); err = asprintf(&uarg_buf, "--net=@%d", tap_fd); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "--"); pvadd(uargpv, unikernel); for (; *argv; argc--, argv++) { pvadd(uargpv, *argv); } err = asprintf(&uarg_buf, "--ipv4=%s", uarg_ip); assert(err != -1); pvadd(uargpv, uarg_buf); err = asprintf(&uarg_buf, "--ipv4-gateway=%s", uarg_gw); assert(err != -1); pvadd(uargpv, uarg_buf); } /* * UNIX: * <unikernel> <unikernel args> */ else if (hypervisor == UNIX) { pvadd(uargpv, unikernel); err = asprintf(&uarg_buf, "--interface=%s", TAP_LINK_NAME); assert(err != -1); pvadd(uargpv, uarg_buf); for (; *argv; argc--, argv++) { pvadd(uargpv, *argv); } err = asprintf(&uarg_buf, "--ipv4=%s", uarg_ip); assert(err != -1); pvadd(uargpv, uarg_buf); err = asprintf(&uarg_buf, "--ipv4-gateway=%s", uarg_gw); assert(err != -1); pvadd(uargpv, uarg_buf); } char **uargv = (char **)pvfinal(uargpv); /* * Done with netlink, free all resources and close socket. */ rtnl_link_put(l_veth); rtnl_link_put(l_bridge); rtnl_link_put(l_tap); nl_addr_put(veth_addr); nl_addr_put(gw_addr); nl_cache_free(link_cache); nl_close(sk); nl_socket_free(sk); /* * Drop all capabilities except CAP_NET_BIND_SERVICE. */ capng_clear(CAPNG_SELECT_BOTH); capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED | CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_BOTH) != 0) { warnx("error: Could not drop capabilities"); return 1; } /* * Run the unikernel. */ err = execv(uargv[0], uargv); warn("error: execv() of %s failed", uargv[0]); return 1; }
static void report(void) { int rc, escalated = 0, need_comma = 0; uid_t uid, euid, suid; gid_t gid, egid, sgid; // Refresh what we have for capabilities if (capng_get_caps_process()) { printf("Error getting capabilities\n"); exit(1); } // Check user credentials getresuid(&uid, &euid, &suid); getresgid(&gid, &egid, &sgid); if (no_child) { if ((uid != euid && uid != 0) || capng_have_capability(CAPNG_EFFECTIVE, CAP_SETUID)) { printf("Attempting to regain root..."); setuid(0); getresuid(&uid, &euid, &suid); if (uid == 0) { printf("SUCCESS - PRIVILEGE ESCALATION POSSIBLE\n"); setgid(0); getresgid(&gid, &egid, &sgid); escalated = 1; } else printf("FAILED\n"); } printf("Child "); } printf("User credentials uid:%d euid:%d suid:%d\n", uid, euid, suid); if (no_child) printf("Child "); printf("Group credentials gid:%d egid:%d sgid:%d\n", gid, egid, sgid); if (uid != euid || gid != egid) printf("Note: app has mismatching credentials!!\n"); // Check capabilities if (text) { if (capng_have_capabilities(CAPNG_SELECT_CAPS) == CAPNG_NONE) { if (no_child) printf("Child capabilities: none\n"); else printf("Current capabilities: none\n"); } else { if (no_child) printf("Child "); printf("Effective: "); capng_print_caps_text(CAPNG_PRINT_STDOUT, CAPNG_EFFECTIVE); printf("\n"); if (no_child) printf("Child "); printf("Permitted: "); capng_print_caps_text(CAPNG_PRINT_STDOUT, CAPNG_PERMITTED); printf("\n"); if (no_child) printf("Child "); printf("Inheritable: "); capng_print_caps_text(CAPNG_PRINT_STDOUT, CAPNG_INHERITABLE); printf("\n"); if (no_child) printf("Child "); printf("Bounding Set: "); capng_print_caps_text(CAPNG_PRINT_STDOUT, CAPNG_BOUNDING_SET); printf("\n"); } } else { if (capng_have_capabilities(CAPNG_SELECT_CAPS) == CAPNG_NONE) { if (no_child) printf("Child capabilities: none\n"); else printf("Current capabilities: none\n"); } else { if (no_child) printf("Child capabilities:\n"); capng_print_caps_numeric(CAPNG_PRINT_STDOUT, CAPNG_SELECT_BOTH); } } // Now check securebits flags #ifdef PR_SET_SECUREBITS if (no_child) printf("Child "); printf("securebits flags: "); rc = prctl(PR_GET_SECUREBITS, 1 << SECURE_NOROOT); if (rc & (1 << SECURE_NOROOT)) { printf("NOROOT"); need_comma = 1; } rc = prctl(PR_GET_SECUREBITS, 1 << SECURE_NOROOT_LOCKED); if (rc & (1 << SECURE_NOROOT_LOCKED)) { if (need_comma) printf(", "); printf("NOROOT_LOCKED"); need_comma = 1; } rc = prctl(PR_GET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP); if (rc & (1 << SECURE_NO_SETUID_FIXUP)) { if (need_comma) printf(", "); printf("NO_SETUID_FIXUP"); need_comma = 1; } rc = prctl(PR_GET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP_LOCKED); if (rc & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) { if (need_comma) printf(", "); printf("NO_SETUID_FIXUP_LOCKED"); need_comma = 1; } if (need_comma == 0) printf("none"); printf("\n"); #endif // Now do child process checks if (no_child == 0 || escalated) { printf("Attempting direct access to shadow..."); if (access("/etc/shadow", R_OK) == 0) printf("SUCCESS\n"); else printf("FAILED (%s)\n", strerror(errno)); } if (no_child == 0) { printf("Attempting to access shadow by child process..."); rc = system("cat /etc/shadow > /dev/null 2>&1"); if (rc == 0) printf("SUCCESS\n"); else printf("FAILED\n"); if (text) system("/usr/bin/captest --no-child --text"); else system("/usr/bin/captest --no-child"); } }