static int ct_clone(void *arg) { int ret = -1; struct ct_clone_arg *ca = arg; struct container *ct = ca->ct; struct process_desc *p = ca->p; close(ca->child_wait_pipe[1]); close(ca->parent_wait_pipe[0]); ret = spawn_wait_and_close(ca->child_wait_pipe); if (ret) goto err_um; if (ct->nsmask & CLONE_NEWUSER) { if (setuid(0) || setgid(0) || setgroups(0, NULL)) goto err; } if (prctl(PR_SET_PDEATHSIG, p->pdeathsig)) goto err; if (!(ct->flags & CT_NOSETSID) && setsid() == -1) goto err; if (ct->tty_fd >= 0 && ioctl(ct->tty_fd, TIOCSCTTY, 0) == -1) goto err; if (p->fds) { p->fds[p->fdn] = ca->parent_wait_pipe[1]; if (setup_fds(p->fds, p->fdn + 1)) goto err; ca->parent_wait_pipe[1] = p->fdn; } if (ct->nsmask & CLONE_NEWNS) { /* * Remount / as slave, so that it doesn't * propagate its changes to our container. */ ret = -LCTERR_CANTMOUNT; if (mount("none", "/", "none", MS_SLAVE|MS_REC, NULL)) goto err; } if (try_mount_cg(ct)) goto err; ret = cgroups_attach(ct); if (ret < 0) goto err_um; if (ct->root_path) { /* * Mount external in child, since it may live * in sub mount namespace. If it doesn't do * it here anyway, just umount by hands in the * fs_umount(). */ ret = fs_mount_ext(ct); if (ret < 0) goto err; ret = set_ct_root(ct); if (ret < 0) goto err_um; ret = fs_create_devnodes(ct); if (ret < 0) goto err; } ret = uname_set(ct); if (ret < 0) goto err_um; ret = try_mount_proc(ct); if (ret < 0) goto err_um; ret = apply_creds(p); if (ret < 0) goto err_um; if (p->lsm_label) ret = lsm_process_label_set(p->lsm_label, false, p->lsm_on_exec); p->lsm_on_exec = 0; if (ret < 0) goto err; if (ca->is_exec) spawn_wake_and_cloexec(ca->parent_wait_pipe, 0); else spawn_wake_and_close(ca->parent_wait_pipe, 0); ret = ca->cb(ca->arg); if (ca->is_exec) goto err; return ret; err_um: if (ct->root_path) fs_umount_ext(ct); err: spawn_wake_and_close(ca->parent_wait_pipe, ret); exit(ret); }
static int attach_child_main(void* data) { struct attach_clone_payload* payload = (struct attach_clone_payload*)data; int ipc_socket = payload->ipc_socket; lxc_attach_options_t* options = payload->options; struct lxc_proc_context_info* init_ctx = payload->init_ctx; #if HAVE_SYS_PERSONALITY_H long new_personality; #endif int ret; int status; int expected; long flags; int fd; uid_t new_uid; gid_t new_gid; /* wait for the initial thread to signal us that it's ready * for us to start initializing */ expected = 0; status = -1; ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected); if (ret <= 0) { ERROR("error using IPC to receive notification from initial process (0)"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } /* A description of the purpose of this functionality is * provided in the lxc-attach(1) manual page. We have to * remount here and not in the parent process, otherwise * /proc may not properly reflect the new pid namespace. */ if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) { ret = lxc_attach_remount_sys_proc(); if (ret < 0) { shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } } /* now perform additional attachments*/ #if HAVE_SYS_PERSONALITY_H if (options->personality < 0) new_personality = init_ctx->personality; else new_personality = options->personality; if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) { ret = personality(new_personality); if (ret < 0) { SYSERROR("could not ensure correct architecture"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } } #endif if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) { ret = lxc_attach_drop_privs(init_ctx); if (ret < 0) { ERROR("could not drop privileges"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } } /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */ ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env); if (ret < 0) { ERROR("could not set initial environment for attached process"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } /* set user / group id */ new_uid = 0; new_gid = 0; /* ignore errors, we will fall back to root in that case * (/proc was not mounted etc.) */ if (options->namespaces & CLONE_NEWUSER) lxc_attach_get_init_uidgid(&new_uid, &new_gid); if (options->uid != (uid_t)-1) new_uid = options->uid; if (options->gid != (gid_t)-1) new_gid = options->gid; /* try to set the uid/gid combination */ if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) { if (setgid(new_gid) || setgroups(0, NULL)) { SYSERROR("switching to container gid"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } } if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) { SYSERROR("switching to container uid"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } /* tell initial process it may now put us into the cgroups */ status = 1; ret = lxc_write_nointr(ipc_socket, &status, sizeof(status)); if (ret != sizeof(status)) { ERROR("error using IPC to notify initial process for initialization (1)"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } /* wait for the initial thread to signal us that it has done * everything for us when it comes to cgroups etc. */ expected = 2; status = -1; ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected); if (ret <= 0) { ERROR("error using IPC to receive final notification from initial process (2)"); shutdown(ipc_socket, SHUT_RDWR); rexit(-1); } shutdown(ipc_socket, SHUT_RDWR); close(ipc_socket); /* set new apparmor profile/selinux context */ if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM)) { int on_exec; on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0; ret = lsm_process_label_set(init_ctx->lsm_label, init_ctx->container->lxc_conf, 0, on_exec); if (ret < 0) { rexit(-1); } } if (init_ctx->container && init_ctx->container->lxc_conf && lxc_seccomp_load(init_ctx->container->lxc_conf) != 0) { ERROR("Loading seccomp policy"); rexit(-1); } lxc_proc_put_context_info(init_ctx); /* The following is done after the communication socket is * shut down. That way, all errors that might (though * unlikely) occur up until this point will have their messages * printed to the original stderr (if logging is so configured) * and not the fd the user supplied, if any. */ /* fd handling for stdin, stdout and stderr; * ignore errors here, user may want to make sure * the fds are closed, for example */ if (options->stdin_fd >= 0 && options->stdin_fd != 0) dup2(options->stdin_fd, 0); if (options->stdout_fd >= 0 && options->stdout_fd != 1) dup2(options->stdout_fd, 1); if (options->stderr_fd >= 0 && options->stderr_fd != 2) dup2(options->stderr_fd, 2); /* close the old fds */ if (options->stdin_fd > 2) close(options->stdin_fd); if (options->stdout_fd > 2) close(options->stdout_fd); if (options->stderr_fd > 2) close(options->stderr_fd); /* try to remove CLOEXEC flag from stdin/stdout/stderr, * but also here, ignore errors */ for (fd = 0; fd <= 2; fd++) { flags = fcntl(fd, F_GETFL); if (flags < 0) continue; if (flags & FD_CLOEXEC) { if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0) { SYSERROR("Unable to clear CLOEXEC from fd"); } } } /* we're done, so we can now do whatever the user intended us to do */ rexit(payload->exec_function(payload->exec_payload)); }