static int parasite_cfg_log(struct parasite_log_args *args) { int ret; ret = recv_fd(tsock); if (ret >= 0) { log_set_fd(ret); log_set_loglevel(args->log_level); ret = 0; } return ret; }
static VOID log_init(VOID) { EFI_STATUS err; err = log_set_logtag(LOG_TAG); if (EFI_ERROR(err)) { warning(L"Could not set log tag: %r\n", err); } log_set_line_len(LOG_LINE_LEN); log_set_flush_to_var(LOG_FLUSH_TO_VARIABLE); log_set_loglevel(LOG_LEVEL); log_set_logtimestamp(LOG_TIMESTAMP); }
void log_apply_config(struct emu *emu) { const char *priority; enum log_priority log_priority; int i; priority = emu->config->loglevel; if (!priority) { log_set_loglevel(LOG_PRIORITY_INFO); return; } log_priority = LOG_PRIORITY_INFO; for (i = LOG_PRIORITY_DEBUG; i < LOG_PRIORITY_NUM_PRIORITIES; i++) { if (strcasecmp(priority, priority_info[i].name) == 0) { log_priority = i; break; } } log_set_loglevel(log_priority); }
int main(int argc, char *argv[], char *envp[]) { pid_t pid = 0, tree_id = 0; int ret = -1; bool usage_error = true; bool has_exec_cmd = false; int opt, idx; int log_level = LOG_UNSET; char *imgs_dir = "."; char *work_dir = NULL; static const char short_opts[] = "dSsRf:F:t:p:hcD:o:n:v::x::Vr:jlW:L:M:"; static struct option long_opts[] = { { "tree", required_argument, 0, 't' }, { "pid", required_argument, 0, 'p' }, { "leave-stopped", no_argument, 0, 's' }, { "leave-running", no_argument, 0, 'R' }, { "restore-detached", no_argument, 0, 'd' }, { "restore-sibling", no_argument, 0, 'S' }, { "daemon", no_argument, 0, 'd' }, { "contents", no_argument, 0, 'c' }, { "file", required_argument, 0, 'f' }, { "fields", required_argument, 0, 'F' }, { "images-dir", required_argument, 0, 'D' }, { "work-dir", required_argument, 0, 'W' }, { "log-file", required_argument, 0, 'o' }, { "namespaces", required_argument, 0, 'n' }, { "root", required_argument, 0, 'r' }, { USK_EXT_PARAM, optional_argument, 0, 'x' }, { "help", no_argument, 0, 'h' }, { SK_EST_PARAM, no_argument, 0, 1042 }, { "close", required_argument, 0, 1043 }, { "log-pid", no_argument, 0, 1044 }, { "version", no_argument, 0, 'V' }, { "evasive-devices", no_argument, 0, 1045 }, { "pidfile", required_argument, 0, 1046 }, { "veth-pair", required_argument, 0, 1047 }, { "action-script", required_argument, 0, 1049 }, { LREMAP_PARAM, no_argument, 0, 1041 }, { OPT_SHELL_JOB, no_argument, 0, 'j' }, { OPT_FILE_LOCKS, no_argument, 0, 'l' }, { "page-server", no_argument, 0, 1050 }, { "address", required_argument, 0, 1051 }, { "port", required_argument, 0, 1052 }, { "prev-images-dir", required_argument, 0, 1053 }, { "ms", no_argument, 0, 1054 }, { "track-mem", no_argument, 0, 1055 }, { "auto-dedup", no_argument, 0, 1056 }, { "libdir", required_argument, 0, 'L' }, { "cpu-cap", optional_argument, 0, 1057 }, { "force-irmap", no_argument, 0, 1058 }, { "ext-mount-map", required_argument, 0, 'M' }, { "exec-cmd", no_argument, 0, 1059 }, { "manage-cgroups", optional_argument, 0, 1060 }, { "cgroup-root", required_argument, 0, 1061 }, { "inherit-fd", required_argument, 0, 1062 }, { "feature", required_argument, 0, 1063 }, { "skip-mnt", required_argument, 0, 1064 }, { "enable-fs", required_argument, 0, 1065 }, { "enable-external-sharing", no_argument, 0, 1066 }, { "enable-external-masters", no_argument, 0, 1067 }, { "freeze-cgroup", required_argument, 0, 1068 }, { "ghost-limit", required_argument, 0, 1069 }, { "irmap-scan-path", required_argument, 0, 1070 }, { "lsm-profile", required_argument, 0, 1071 }, { "timeout", required_argument, 0, 1072 }, { "external", required_argument, 0, 1073 }, { "empty-ns", required_argument, 0, 1074 }, { "extra", no_argument, 0, 1077 }, { "experimental", no_argument, 0, 1078 }, { "all", no_argument, 0, 1079 }, { }, }; BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE); if (fault_injection_init()) return 1; cr_pb_init(); setproctitle_init(argc, argv, envp); if (argc < 2) goto usage; init_opts(); if (init_service_fd()) return 1; if (!strcmp(argv[1], "swrk")) { if (argc < 3) goto usage; /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk <fd>" and is not for CLI/scripts. * The arguments semantics can change at any tyme with the * corresponding lib call change. */ opts.swrk_restore = true; return cr_service_work(atoi(argv[2])); } while (1) { idx = -1; opt = getopt_long(argc, argv, short_opts, long_opts, &idx); if (opt == -1) break; switch (opt) { case 's': opts.final_state = TASK_STOPPED; break; case 'R': opts.final_state = TASK_ALIVE; break; case 'x': if (optarg && unix_sk_ids_parse(optarg) < 0) return 1; opts.ext_unix_sk = true; break; case 'p': pid = atoi(optarg); if (pid <= 0) goto bad_arg; break; case 't': tree_id = atoi(optarg); if (tree_id <= 0) goto bad_arg; break; case 'c': opts.show_pages_content = true; break; case 'f': opts.show_dump_file = optarg; break; case 'F': opts.show_fmt = optarg; break; case 'r': opts.root = optarg; break; case 'd': opts.restore_detach = true; break; case 'S': opts.restore_sibling = true; break; case 'D': imgs_dir = optarg; break; case 'W': work_dir = optarg; break; case 'o': opts.output = optarg; break; case 'n': if (parse_ns_string(optarg)) goto bad_arg; break; case 'v': if (log_level == LOG_UNSET) log_level = 0; if (optarg) { if (optarg[0] == 'v') /* handle -vvvvv */ log_level += strlen(optarg) + 1; else log_level = atoi(optarg); } else log_level++; break; case 1041: pr_info("Will allow link remaps on FS\n"); opts.link_remap_ok = true; break; case 1042: pr_info("Will dump TCP connections\n"); opts.tcp_established_ok = true; break; case 1043: { int fd; fd = atoi(optarg); pr_info("Closing fd %d\n", fd); close(fd); break; } case 1044: opts.log_file_per_pid = 1; break; case 1045: opts.evasive_devices = true; break; case 1046: opts.pidfile = optarg; break; case 1047: { char *aux; aux = strchr(optarg, '='); if (aux == NULL) goto bad_arg; *aux = '\0'; if (veth_pair_add(optarg, aux + 1)) return 1; } break; case 1049: if (add_script(optarg, 0)) return 1; break; case 1050: opts.use_page_server = true; break; case 1051: opts.addr = optarg; break; case 1052: opts.port = htons(atoi(optarg)); if (!opts.port) goto bad_arg; break; case 'j': opts.shell_job = true; break; case 'l': opts.handle_file_locks = true; break; case 1053: opts.img_parent = optarg; break; case 1055: opts.track_mem = true; break; case 1056: opts.auto_dedup = true; break; case 1057: if (parse_cpu_cap(&opts, optarg)) goto usage; break; case 1058: opts.force_irmap = true; break; case 1054: pr_err("--ms is deprecated; see \"Check options\" of criu --help\n"); return 1; case 'L': opts.libdir = optarg; break; case 1059: has_exec_cmd = true; break; case 1060: if (parse_manage_cgroups(&opts, optarg)) goto usage; break; case 1061: { char *path, *ctl; path = strchr(optarg, ':'); if (path) { *path = '\0'; path++; ctl = optarg; } else { path = optarg; ctl = NULL; } if (new_cg_root_add(ctl, path)) return -1; } break; case 1062: if (inherit_fd_parse(optarg) < 0) return 1; break; case 1063: ret = check_add_feature(optarg); if (ret < 0) /* invalid kernel feature name */ return 1; if (ret > 0) /* list kernel features and exit */ return 0; break; case 1064: if (!add_skip_mount(optarg)) return 1; break; case 1065: if (!add_fsname_auto(optarg)) return 1; break; case 1066: opts.enable_external_sharing = true; break; case 1067: opts.enable_external_masters = true; break; case 1068: opts.freeze_cgroup = optarg; break; case 1069: opts.ghost_limit = parse_size(optarg); break; case 1070: if (irmap_scan_path_add(optarg)) return -1; break; case 1071: if (parse_lsm_arg(optarg) < 0) return -1; break; case 1072: opts.timeout = atoi(optarg); break; case 'M': { char *aux; if (strcmp(optarg, "auto") == 0) { opts.autodetect_ext_mounts = true; break; } aux = strchr(optarg, ':'); if (aux == NULL) goto bad_arg; *aux = '\0'; if (ext_mount_add(optarg, aux + 1)) return 1; } break; case 1073: if (add_external(optarg)) return 1; break; case 1074: if (!strcmp("net", optarg)) opts.empty_ns |= CLONE_NEWNET; else { pr_err("Unsupported empty namespace: %s", optarg); return 1; } break; case 1077: opts.check_extra_features = true; break; case 1078: opts.check_experimental_features = true; break; case 1079: opts.check_extra_features = true; opts.check_experimental_features = true; break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) pr_msg("GitID: %s\n", CRIU_GITID); return 0; case 'h': usage_error = false; goto usage; default: goto usage; } } if (!opts.restore_detach && opts.restore_sibling) { pr_msg("--restore-sibling only makes sense with --restore-detach\n"); return 1; } if (!opts.autodetect_ext_mounts && (opts.enable_external_masters || opts.enable_external_sharing)) { pr_msg("must specify --ext-mount-map auto with --enable-external-{sharing|masters}"); return 1; } if (work_dir == NULL) work_dir = imgs_dir; if (optind >= argc) { pr_msg("Error: command is required\n"); goto usage; } if (has_exec_cmd) { if (argc - optind <= 1) { pr_msg("Error: --exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { pr_msg("Error: --exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); goto usage; } opts.exec_cmd = xmalloc((argc - optind) * sizeof(char *)); if (!opts.exec_cmd) return 1; memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *)); opts.exec_cmd[argc - optind - 1] = NULL; } /* We must not open imgs dir, if service is called */ if (strcmp(argv[optind], "service")) { ret = open_image_dir(imgs_dir); if (ret < 0) return 1; } if (chdir(work_dir)) { pr_perror("Can't change directory to %s", work_dir); return 1; } log_set_loglevel(log_level); if (log_init(opts.output)) return 1; if (!list_empty(&opts.external) && strcmp(argv[optind], "dump")) { pr_err("--external is dump-only option\n"); return 1; } if (!list_empty(&opts.inherit_fds)) { if (strcmp(argv[optind], "restore")) { pr_err("--inherit-fd is restore-only option\n"); return 1; } /* now that log file is set up, print inherit fd list */ inherit_fd_log(); } if (opts.img_parent) pr_info("Will do snapshot from %s\n", opts.img_parent); if (!strcmp(argv[optind], "dump")) { preload_socket_modules(); preload_netfilter_modules(); if (!tree_id) goto opt_pid_missing; return cr_dump_tasks(tree_id); } if (!strcmp(argv[optind], "pre-dump")) { if (!tree_id) goto opt_pid_missing; return cr_pre_dump_tasks(tree_id) != 0; } if (!strcmp(argv[optind], "restore")) { preload_netfilter_modules(); if (tree_id) pr_warn("Using -t with criu restore is obsoleted\n"); ret = cr_restore_tasks(); if (ret == 0 && opts.exec_cmd) { close_pid_proc(); execvp(opts.exec_cmd[0], opts.exec_cmd); pr_perror("Failed to exec command %s", opts.exec_cmd[0]); ret = 1; } return ret != 0; } if (!strcmp(argv[optind], "show")) { pr_msg("The \"show\" action is deprecated by the CRIT utility.\n"); pr_msg("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; } if (!strcmp(argv[optind], "check")) return cr_check() != 0; if (!strcmp(argv[optind], "exec")) { if (!pid) pid = tree_id; /* old usage */ if (!pid) goto opt_pid_missing; return cr_exec(pid, argv + optind + 1) != 0; } if (!strcmp(argv[optind], "page-server")) return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1; if (!strcmp(argv[optind], "service")) return cr_service(opts.daemon_mode); if (!strcmp(argv[optind], "dedup")) return cr_dedup() != 0; if (!strcmp(argv[optind], "cpuinfo")) { if (!argv[optind + 1]) goto usage; if (!strcmp(argv[optind + 1], "dump")) return cpuinfo_dump(); else if (!strcmp(argv[optind + 1], "check")) return cpuinfo_check(); } pr_msg("Error: unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" " criu dump|pre-dump -t PID [<options>]\n" " criu restore [<options>]\n" " criu check [--feature FEAT]\n" " criu exec -p PID <syscall-string>\n" " criu page-server\n" " criu service [<options>]\n" " criu dedup\n" "\n" "Commands:\n" " dump checkpoint a process/tree identified by pid\n" " pre-dump pre-dump task(s) minimizing their frozen time\n" " restore restore a process/tree\n" " check checks whether the kernel support is up-to-date\n" " exec execute a system call by other task\n" " page-server launch page server\n" " service launch service\n" " dedup remove duplicates in memory dump\n" " cpuinfo dump writes cpu information into image file\n" " cpuinfo check validates cpu information read from image file\n" ); if (usage_error) { pr_msg("\nTry -h|--help for more info\n"); return 1; } pr_msg("\n" "Dump/Restore options:\n" "\n" "* Generic:\n" " -t|--tree PID checkpoint a process tree identified by PID\n" " -d|--restore-detached detach after restore\n" " -S|--restore-sibling restore root task as sibling\n" " -s|--leave-stopped leave tasks in stopped state after checkpoint\n" " -R|--leave-running leave tasks in running state after checkpoint\n" " -D|--images-dir DIR directory for image files\n" " --pidfile FILE write root task, service or page-server pid to FILE\n" " -W|--work-dir DIR directory to cd and write logs/pidfiles/stats to\n" " (if not specified, value of --images-dir is used)\n" " --cpu-cap [CAP] require certain cpu capability. CAP: may be one of:\n" " 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n" " --exec-cmd execute the command specified after '--' on successful\n" " restore making it the parent of the restored process\n" " --freeze-cgroup\n" " use cgroup freezer to collect processes\n" "\n" "* Special resources support:\n" " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n" " --" SK_EST_PARAM " checkpoint/restore established TCP connections\n" " -r|--root PATH change the root filesystem (when run in mount namespace)\n" " --evasive-devices use any path to a device file if the original one\n" " is inaccessible\n" " --veth-pair IN=OUT map inside veth device name to outside one\n" " can optionally append @<bridge-name> to OUT for moving\n" " the outside veth to the named bridge\n" " --link-remap allow one to link unlinked files back when possible\n" " --ghost-limit size specify maximum size of deleted file contents to be carried inside an image file\n" " --action-script FILE add an external action script\n" " -j|--" OPT_SHELL_JOB " allow one to dump and restore shell jobs\n" " -l|--" OPT_FILE_LOCKS " handle file locks, for safety, only used for container\n" " -L|--libdir path to a plugin directory (by default " CR_PLUGIN_DEFAULT ")\n" " --force-irmap force resolving names for inotify/fsnotify watches\n" " --irmap-scan-path FILE\n" " add a path the irmap hints to scan\n" " -M|--ext-mount-map KEY:VALUE\n" " add external mount mapping\n" " -M|--ext-mount-map auto\n" " attempt to autodetect external mount mapings\n" " --enable-external-sharing\n" " allow autoresolving mounts with external sharing\n" " --enable-external-masters\n" " allow autoresolving mounts with external masters\n" " --manage-cgroups [m] dump or restore cgroups the process is in usig mode:\n" " 'none', 'props', 'soft' (default), 'full' and 'strict'.\n" " --cgroup-root [controller:]/newroot\n" " change the root cgroup the controller will be\n" " installed into. No controller means that root is the\n" " default for all controllers not specified.\n" " --skip-mnt PATH ignore this mountpoint when dumping the mount namespace.\n" " --enable-fs FSNAMES a comma separated list of filesystem names or \"all\".\n" " force criu to (try to) dump/restore these filesystem's\n" " mountpoints even if fs is not supported.\n" " --external RES dump objects from this list as external resources:\n" " Formats of RES:\n" " tty[rdev:dev]\n" " file[mnt_id:inode]\n" " --inherit-fd fd[<num>]:<existing>\n" " Inherit file descriptors. This allows to treat file descriptor\n" " <num> as being already opened via <existing> one and instead of\n" " trying to open we inherit it:\n" " tty[rdev:dev]\n" " pipe[inode]\n" " socket[inode]\n" " file[mnt_id:inode]\n" " --empty-ns {net}\n" " Create a namespace, but don't restore its properies.\n" " An user will retore them from action scripts.\n" "Check options:\n" " without any arguments, \"criu check\" checks availability of absolutely required\n" " kernel features; if any of these features is missing dump and restore will fail\n" " --extra also check availability of extra kernel features\n" " --experimental also check availability of experimental kernel features\n" " --all also check availability of extra and experimental kernel features\n" " --feature FEAT only check availability of one of the following kernel features\n" " " ); check_add_feature("list"); pr_msg( "\n" "* Logging:\n" " -o|--log-file FILE log file name\n" " --log-pid enable per-process logging to separate FILE.pid files\n" " -v[NUM] set logging level (higher level means more output):\n" " -v1|-v - only errors and messages\n" " -v2|-vv - also warnings (default level)\n" " -v3|-vvv - also information messages and timestamps\n" " -v4|-vvvv - lots of debug\n" "\n" "* Memory dumping options:\n" " --track-mem turn on memory changes tracker in kernel\n" " --prev-images-dir DIR path to images from previous dump (relative to -D)\n" " --page-server send pages to page server (see options below as well)\n" " --auto-dedup when used on dump it will deduplicate \"old\" data in\n" " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image.\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" " --port PORT port of page server\n" " -d|--daemon run in the background after creating socket\n" "\n" "Other options:\n" " -h|--help show this text\n" " -V|--version show version\n" ); return 0; opt_pid_missing: pr_msg("Error: pid not specified\n"); return 1; bad_arg: if (idx < 0) /* short option */ pr_msg("Error: invalid argument for -%c: %s\n", opt, optarg); else /* long option */ pr_msg("Error: invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; }
static int setup_opts_from_req(int sk, CriuOpts *req) { struct ucred ids; struct stat st; socklen_t ids_len = sizeof(struct ucred); char images_dir_path[PATH_MAX]; char work_dir_path[PATH_MAX]; int i; if (getsockopt(sk, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) { pr_perror("Can't get socket options"); goto err; } if (fstat(sk, &st)) { pr_perror("Can't get socket stat"); goto err; } BUG_ON(st.st_ino == -1); service_sk_ino = st.st_ino; /* open images_dir */ sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd); if (req->parent_img) opts.img_parent = req->parent_img; if (open_image_dir(images_dir_path) < 0) { pr_perror("Can't open images directory"); goto err; } /* get full path to images_dir to use in process title */ if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) { pr_perror("Can't readlink %s", images_dir_path); goto err; } /* chdir to work dir */ if (req->has_work_dir_fd) sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd); else strcpy(work_dir_path, images_dir_path); if (chdir(work_dir_path)) { pr_perror("Can't chdir to work_dir"); goto err; } /* initiate log file in work dir */ if (req->log_file) { if (strchr(req->log_file, '/')) { pr_perror("No subdirs are allowed in log_file name"); goto err; } opts.output = req->log_file; } else opts.output = DEFAULT_LOG_FILENAME; log_set_loglevel(req->log_level); if (log_init(opts.output) == -1) { pr_perror("Can't initiate log"); goto err; } /* checking flags from client */ if (req->has_leave_running && req->leave_running) opts.final_state = TASK_ALIVE; if (!req->has_pid) { req->has_pid = true; req->pid = ids.pid; } if (req->has_ext_unix_sk) { opts.ext_unix_sk = req->ext_unix_sk; for (i = 0; i < req->n_unix_sk_ino; i++) { if (unix_sk_id_add(req->unix_sk_ino[i]->inode) < 0) goto err; } } if (req->root) opts.root = req->root; if (req->has_rst_sibling) { if (!opts.swrk_restore) { pr_err("rst_sibling is not allowed in standalone service\n"); goto err; } opts.restore_sibling = req->rst_sibling; } if (req->has_tcp_established) opts.tcp_established_ok = req->tcp_established; if (req->has_tcp_skip_in_flight) opts.tcp_skip_in_flight = req->tcp_skip_in_flight; if (req->has_evasive_devices) opts.evasive_devices = req->evasive_devices; if (req->has_shell_job) opts.shell_job = req->shell_job; if (req->has_file_locks) opts.handle_file_locks = req->file_locks; if (req->has_track_mem) opts.track_mem = req->track_mem; if (req->has_link_remap) opts.link_remap_ok = req->link_remap; if (req->has_auto_dedup) opts.auto_dedup = req->auto_dedup; if (req->has_force_irmap) opts.force_irmap = req->force_irmap; if (req->n_exec_cmd > 0) { opts.exec_cmd = xmalloc((req->n_exec_cmd + 1) * sizeof(char *)); memcpy(opts.exec_cmd, req->exec_cmd, req->n_exec_cmd * sizeof(char *)); opts.exec_cmd[req->n_exec_cmd] = NULL; } if (req->ps) { opts.use_page_server = true; opts.addr = req->ps->address; opts.port = htons((short)req->ps->port); if (req->ps->has_fd) { if (!opts.swrk_restore) goto err; opts.ps_socket = req->ps->fd; } } if (req->notify_scripts && add_rpc_notify(sk)) goto err; for (i = 0; i < req->n_veths; i++) { if (veth_pair_add(req->veths[i]->if_in, req->veths[i]->if_out)) goto err; } for (i = 0; i < req->n_ext_mnt; i++) { if (ext_mount_add(req->ext_mnt[i]->key, req->ext_mnt[i]->val)) goto err; } for (i = 0; i < req->n_join_ns; i++) { if (join_ns_add(req->join_ns[i]->ns, req->join_ns[i]->ns_file, req->join_ns[i]->extra_opt)) goto err; } if (req->n_inherit_fd && !opts.swrk_restore) { pr_err("inherit_fd is not allowed in standalone service\n"); goto err; } for (i = 0; i < req->n_inherit_fd; i++) { if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key)) goto err; } for (i = 0; i < req->n_external; i++) if (add_external(req->external[i])) goto err; for (i = 0; i < req->n_cg_root; i++) { if (new_cg_root_add(req->cg_root[i]->ctrl, req->cg_root[i]->path)) goto err; } for (i = 0; i < req->n_enable_fs; i++) { if (!add_fsname_auto(req->enable_fs[i])) goto err; } for (i = 0; i < req->n_skip_mnt; i++) { if (!add_skip_mount(req->skip_mnt[i])) goto err; } if (req->has_cpu_cap) opts.cpu_cap = req->cpu_cap; /* * FIXME: For backward compatibility we setup * soft mode here, need to enhance to support * other modes as well via separate option * probably. */ if (req->has_manage_cgroups) opts.manage_cgroups = req->manage_cgroups ? CG_MODE_SOFT : CG_MODE_IGNORE; /* Override the manage_cgroup if mode is set explicitly */ if (req->has_manage_cgroups_mode) { unsigned int mode; switch (req->manage_cgroups_mode) { case CRIU_CG_MODE__IGNORE: mode = CG_MODE_IGNORE; break; case CRIU_CG_MODE__NONE: mode = CG_MODE_NONE; break; case CRIU_CG_MODE__PROPS: mode = CG_MODE_PROPS; break; case CRIU_CG_MODE__SOFT: mode = CG_MODE_SOFT; break; case CRIU_CG_MODE__FULL: mode = CG_MODE_FULL; break; case CRIU_CG_MODE__STRICT: mode = CG_MODE_STRICT; break; case CRIU_CG_MODE__DEFAULT: mode = CG_MODE_DEFAULT; break; default: goto err; } opts.manage_cgroups = mode; } if (req->freeze_cgroup) opts.freeze_cgroup = req->freeze_cgroup; if (req->has_timeout) opts.timeout = req->timeout; if (req->cgroup_props) opts.cgroup_props = req->cgroup_props; if (req->cgroup_props_file) opts.cgroup_props_file = req->cgroup_props_file; for (i = 0; i < req->n_cgroup_dump_controller; i++) { if (!cgp_add_dump_controller(req->cgroup_dump_controller[i])) goto err; } if (req->has_auto_ext_mnt) opts.autodetect_ext_mounts = req->auto_ext_mnt; if (req->has_ext_sharing) opts.enable_external_sharing = req->ext_sharing; if (req->has_ext_masters) opts.enable_external_masters = req->ext_masters; if (req->has_ghost_limit) opts.ghost_limit = req->ghost_limit; if (req->has_empty_ns) { opts.empty_ns = req->empty_ns; if (req->empty_ns & ~(CLONE_NEWNET)) goto err; } if (req->n_irmap_scan_paths) { for (i = 0; i < req->n_irmap_scan_paths; i++) { if (irmap_scan_path_add(req->irmap_scan_paths[i])) goto err; } } if (check_namespace_opts()) goto err; return 0; err: set_cr_errno(EBADRQC); return -1; }
int main(int argc, char *argv[], char *envp[]) { #define BOOL_OPT(OPT_NAME, SAVE_TO) \ {OPT_NAME, no_argument, SAVE_TO, true},\ {"no-" OPT_NAME, no_argument, SAVE_TO, false} pid_t pid = 0, tree_id = 0; int ret = -1; bool usage_error = true; bool has_exec_cmd = false; bool has_sub_command; int opt, idx; int log_level = DEFAULT_LOGLEVEL; char *imgs_dir = "."; static const char short_opts[] = "dSsRf:F:t:p:hcD:o:v::x::Vr:jJ:lW:L:M:"; static struct option long_opts[] = { { "tree", required_argument, 0, 't' }, { "pid", required_argument, 0, 'p' }, { "leave-stopped", no_argument, 0, 's' }, { "leave-running", no_argument, 0, 'R' }, BOOL_OPT("restore-detached", &opts.restore_detach), BOOL_OPT("restore-sibling", &opts.restore_sibling), BOOL_OPT("daemon", &opts.restore_detach), { "contents", no_argument, 0, 'c' }, { "file", required_argument, 0, 'f' }, { "fields", required_argument, 0, 'F' }, { "images-dir", required_argument, 0, 'D' }, { "work-dir", required_argument, 0, 'W' }, { "log-file", required_argument, 0, 'o' }, { "join-ns", required_argument, 0, 'J' }, { "root", required_argument, 0, 'r' }, { USK_EXT_PARAM, optional_argument, 0, 'x' }, { "help", no_argument, 0, 'h' }, BOOL_OPT(SK_EST_PARAM, &opts.tcp_established_ok), { "close", required_argument, 0, 1043 }, BOOL_OPT("log-pid", &opts.log_file_per_pid), { "version", no_argument, 0, 'V' }, BOOL_OPT("evasive-devices", &opts.evasive_devices), { "pidfile", required_argument, 0, 1046 }, { "veth-pair", required_argument, 0, 1047 }, { "action-script", required_argument, 0, 1049 }, BOOL_OPT(LREMAP_PARAM, &opts.link_remap_ok), BOOL_OPT(OPT_SHELL_JOB, &opts.shell_job), BOOL_OPT(OPT_FILE_LOCKS, &opts.handle_file_locks), BOOL_OPT("page-server", &opts.use_page_server), { "address", required_argument, 0, 1051 }, { "port", required_argument, 0, 1052 }, { "prev-images-dir", required_argument, 0, 1053 }, { "ms", no_argument, 0, 1054 }, BOOL_OPT("track-mem", &opts.track_mem), BOOL_OPT("auto-dedup", &opts.auto_dedup), { "libdir", required_argument, 0, 'L' }, { "cpu-cap", optional_argument, 0, 1057 }, BOOL_OPT("force-irmap", &opts.force_irmap), { "ext-mount-map", required_argument, 0, 'M' }, { "exec-cmd", no_argument, 0, 1059 }, { "manage-cgroups", optional_argument, 0, 1060 }, { "cgroup-root", required_argument, 0, 1061 }, { "inherit-fd", required_argument, 0, 1062 }, { "feature", required_argument, 0, 1063 }, { "skip-mnt", required_argument, 0, 1064 }, { "enable-fs", required_argument, 0, 1065 }, { "enable-external-sharing", no_argument, &opts.enable_external_sharing, true }, { "enable-external-masters", no_argument, &opts.enable_external_masters, true }, { "freeze-cgroup", required_argument, 0, 1068 }, { "ghost-limit", required_argument, 0, 1069 }, { "irmap-scan-path", required_argument, 0, 1070 }, { "lsm-profile", required_argument, 0, 1071 }, { "timeout", required_argument, 0, 1072 }, { "external", required_argument, 0, 1073 }, { "empty-ns", required_argument, 0, 1074 }, BOOL_OPT("extra", &opts.check_extra_features), BOOL_OPT("experimental", &opts.check_experimental_features), { "all", no_argument, 0, 1079 }, { "cgroup-props", required_argument, 0, 1080 }, { "cgroup-props-file", required_argument, 0, 1081 }, { "cgroup-dump-controller", required_argument, 0, 1082 }, BOOL_OPT(SK_INFLIGHT_PARAM, &opts.tcp_skip_in_flight), BOOL_OPT("deprecated", &opts.deprecated_ok), BOOL_OPT("display-stats", &opts.display_stats), BOOL_OPT("weak-sysctls", &opts.weak_sysctls), { "status-fd", required_argument, 0, 1088 }, { }, }; #undef BOOL_OPT BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE); BUILD_BUG_ON(CTL_32 != SYSCTL_TYPE__CTL_32); BUILD_BUG_ON(__CTL_STR != SYSCTL_TYPE__CTL_STR); if (fault_injection_init()) return 1; cr_pb_init(); setproctitle_init(argc, argv, envp); if (argc < 2) goto usage; init_opts(); if (init_service_fd()) return 1; if (!strcmp(argv[1], "swrk")) { if (argc < 3) goto usage; /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk <fd>" and is not for CLI/scripts. * The arguments semantics can change at any time with the * corresponding lib call change. */ opts.swrk_restore = true; return cr_service_work(atoi(argv[2])); } while (1) { idx = -1; opt = getopt_long(argc, argv, short_opts, long_opts, &idx); if (opt == -1) break; if (!opt) continue; switch (opt) { case 's': opts.final_state = TASK_STOPPED; break; case 'R': opts.final_state = TASK_ALIVE; break; case 'x': if (optarg && unix_sk_ids_parse(optarg) < 0) return 1; opts.ext_unix_sk = true; break; case 'p': pid = atoi(optarg); if (pid <= 0) goto bad_arg; break; case 't': tree_id = atoi(optarg); if (tree_id <= 0) goto bad_arg; break; case 'c': opts.show_pages_content = true; break; case 'f': opts.show_dump_file = optarg; break; case 'F': opts.show_fmt = optarg; break; case 'r': opts.root = optarg; break; case 'd': opts.restore_detach = true; break; case 'S': opts.restore_sibling = true; break; case 'D': imgs_dir = optarg; break; case 'W': opts.work_dir = optarg; break; case 'o': opts.output = optarg; break; case 'J': if (parse_join_ns(optarg)) goto bad_arg; break; case 'v': if (optarg) { if (optarg[0] == 'v') /* handle -vvvvv */ log_level += strlen(optarg) + 1; else log_level = atoi(optarg); } else log_level++; break; case 1043: { int fd; fd = atoi(optarg); pr_info("Closing fd %d\n", fd); close(fd); break; } case 1046: opts.pidfile = optarg; break; case 1047: { char *aux; aux = strchr(optarg, '='); if (aux == NULL) goto bad_arg; *aux = '\0'; if (veth_pair_add(optarg, aux + 1)) return 1; } break; case 1049: if (add_script(optarg)) return 1; break; case 1051: opts.addr = optarg; break; case 1052: opts.port = htons(atoi(optarg)); if (!opts.port) goto bad_arg; break; case 'j': opts.shell_job = true; break; case 'l': opts.handle_file_locks = true; break; case 1053: opts.img_parent = optarg; break; case 1057: if (parse_cpu_cap(&opts, optarg)) goto usage; break; case 1058: opts.force_irmap = true; break; case 1054: pr_err("--ms is deprecated; see \"Check options\" of criu --help\n"); return 1; case 'L': opts.libdir = optarg; break; case 1059: has_exec_cmd = true; break; case 1060: if (parse_manage_cgroups(&opts, optarg)) goto usage; break; case 1061: { char *path, *ctl; path = strchr(optarg, ':'); if (path) { *path = '\0'; path++; ctl = optarg; } else { path = optarg; ctl = NULL; } if (new_cg_root_add(ctl, path)) return -1; } break; case 1062: if (inherit_fd_parse(optarg) < 0) return 1; break; case 1063: ret = check_add_feature(optarg); if (ret < 0) /* invalid kernel feature name */ return 1; if (ret > 0) /* list kernel features and exit */ return 0; break; case 1064: if (!add_skip_mount(optarg)) return 1; break; case 1065: if (!add_fsname_auto(optarg)) return 1; break; case 1068: opts.freeze_cgroup = optarg; break; case 1069: opts.ghost_limit = parse_size(optarg); break; case 1070: if (irmap_scan_path_add(optarg)) return -1; break; case 1071: opts.lsm_profile = optarg; opts.lsm_supplied = true; break; case 1072: opts.timeout = atoi(optarg); break; case 'M': { char *aux; if (strcmp(optarg, "auto") == 0) { opts.autodetect_ext_mounts = true; break; } aux = strchr(optarg, ':'); if (aux == NULL) goto bad_arg; *aux = '\0'; if (ext_mount_add(optarg, aux + 1)) return 1; } break; case 1073: if (add_external(optarg)) return 1; break; case 1074: if (!strcmp("net", optarg)) opts.empty_ns |= CLONE_NEWNET; else { pr_err("Unsupported empty namespace: %s\n", optarg); return 1; } break; case 1079: opts.check_extra_features = true; opts.check_experimental_features = true; break; case 1080: opts.cgroup_props = optarg; break; case 1081: opts.cgroup_props_file = optarg; break; case 1082: if (!cgp_add_dump_controller(optarg)) return 1; break; case 1088: if (sscanf(optarg, "%d", &opts.status_fd) != 1) { pr_err("Unable to parse a value of --status-fd\n"); return 1; } break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) pr_msg("GitID: %s\n", CRIU_GITID); return 0; case 'h': usage_error = false; goto usage; default: goto usage; } } if (opts.deprecated_ok) pr_msg("Turn deprecated stuff ON\n"); if (opts.tcp_skip_in_flight) pr_msg("Will skip in-flight TCP connections\n"); if (opts.tcp_established_ok) pr_info("Will dump TCP connections\n"); if (opts.link_remap_ok) pr_info("Will allow link remaps on FS\n"); if (opts.weak_sysctls) pr_msg("Will skip non-existant sysctls on restore\n"); if (getenv("CRIU_DEPRECATED")) { pr_msg("Turn deprecated stuff ON via env\n"); opts.deprecated_ok = true; } if (check_namespace_opts()) { pr_msg("Error: namespace flags conflict\n"); return 1; } if (!opts.restore_detach && opts.restore_sibling) { pr_msg("--restore-sibling only makes sense with --restore-detach\n"); return 1; } if (opts.work_dir == NULL) opts.work_dir = imgs_dir; if (optind >= argc) { pr_msg("Error: command is required\n"); goto usage; } if (!strcmp(argv[optind], "exec")) { pr_msg("The \"exec\" action is deprecated by the Compel library.\n"); return -1; } has_sub_command = (argc - optind) > 1; if (has_exec_cmd) { if (!has_sub_command) { pr_msg("Error: --exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { pr_msg("Error: --exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); goto usage; } opts.exec_cmd = xmalloc((argc - optind) * sizeof(char *)); if (!opts.exec_cmd) return 1; memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *)); opts.exec_cmd[argc - optind - 1] = NULL; } else { /* No subcommands except for cpuinfo and restore --exec-cmd */ if (strcmp(argv[optind], "cpuinfo") && has_sub_command) { pr_msg("Error: excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; } } /* We must not open imgs dir, if service is called */ if (strcmp(argv[optind], "service")) { ret = open_image_dir(imgs_dir); if (ret < 0) return 1; } /* * When a process group becomes an orphan, * its processes are sent a SIGHUP signal */ if (!strcmp(argv[optind], "restore") && opts.restore_detach && opts.final_state == TASK_STOPPED && opts.shell_job) pr_warn("Stopped and detached shell job will get SIGHUP from OS."); if (chdir(opts.work_dir)) { pr_perror("Can't change directory to %s", opts.work_dir); return 1; } log_set_loglevel(log_level); if (log_init(opts.output)) return 1; libsoccr_set_log(log_level, print_on_level); compel_log_init(vprint_on_level, log_get_loglevel()); pr_debug("Version: %s (gitid %s)\n", CRIU_VERSION, CRIU_GITID); if (opts.deprecated_ok) pr_debug("DEPRECATED ON\n"); if (!list_empty(&opts.inherit_fds)) { if (strcmp(argv[optind], "restore")) { pr_err("--inherit-fd is restore-only option\n"); return 1; } /* now that log file is set up, print inherit fd list */ inherit_fd_log(); } if (opts.img_parent) pr_info("Will do snapshot from %s\n", opts.img_parent); if (!strcmp(argv[optind], "dump")) { if (!tree_id) goto opt_pid_missing; return cr_dump_tasks(tree_id); } if (!strcmp(argv[optind], "pre-dump")) { if (!tree_id) goto opt_pid_missing; return cr_pre_dump_tasks(tree_id) != 0; } if (!strcmp(argv[optind], "restore")) { if (tree_id) pr_warn("Using -t with criu restore is obsoleted\n"); ret = cr_restore_tasks(); if (ret == 0 && opts.exec_cmd) { close_pid_proc(); execvp(opts.exec_cmd[0], opts.exec_cmd); pr_perror("Failed to exec command %s", opts.exec_cmd[0]); ret = 1; } return ret != 0; } if (!strcmp(argv[optind], "show")) { pr_msg("The \"show\" action is deprecated by the CRIT utility.\n"); pr_msg("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; } if (!strcmp(argv[optind], "check")) return cr_check() != 0; if (!strcmp(argv[optind], "page-server")) return cr_page_server(opts.daemon_mode, -1) != 0; if (!strcmp(argv[optind], "service")) return cr_service(opts.daemon_mode); if (!strcmp(argv[optind], "dedup")) return cr_dedup() != 0; if (!strcmp(argv[optind], "cpuinfo")) { if (!argv[optind + 1]) { pr_msg("Error: cpuinfo requires an action: dump or check\n"); goto usage; } if (!strcmp(argv[optind + 1], "dump")) return cpuinfo_dump(); else if (!strcmp(argv[optind + 1], "check")) return cpuinfo_check(); } pr_msg("Error: unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" " criu dump|pre-dump -t PID [<options>]\n" " criu restore [<options>]\n" " criu check [--feature FEAT]\n" " criu page-server\n" " criu service [<options>]\n" " criu dedup\n" "\n" "Commands:\n" " dump checkpoint a process/tree identified by pid\n" " pre-dump pre-dump task(s) minimizing their frozen time\n" " restore restore a process/tree\n" " check checks whether the kernel support is up-to-date\n" " page-server launch page server\n" " service launch service\n" " dedup remove duplicates in memory dump\n" " cpuinfo dump writes cpu information into image file\n" " cpuinfo check validates cpu information read from image file\n" ); if (usage_error) { pr_msg("\nTry -h|--help for more info\n"); return 1; } pr_msg("\n" "Most of the true / false long options (the ones without arguments) can be\n" "prefixed with --no- to negate the option (example: --display-stats and\n" "--no-display-stats).\n" "\n" "Dump/Restore options:\n" "\n" "* Generic:\n" " -t|--tree PID checkpoint a process tree identified by PID\n" " -d|--restore-detached detach after restore\n" " -S|--restore-sibling restore root task as sibling\n" " -s|--leave-stopped leave tasks in stopped state after checkpoint\n" " -R|--leave-running leave tasks in running state after checkpoint\n" " -D|--images-dir DIR directory for image files\n" " --pidfile FILE write root task, service or page-server pid to FILE\n" " -W|--work-dir DIR directory to cd and write logs/pidfiles/stats to\n" " (if not specified, value of --images-dir is used)\n" " --cpu-cap [CAP] CPU capabilities to write/check. CAP is comma-separated\n" " list of: cpu, fpu, all, ins, none. To disable\n" " a capability, use ^CAP. Empty argument implies all\n" " --exec-cmd execute the command specified after '--' on successful\n" " restore making it the parent of the restored process\n" " --freeze-cgroup use cgroup freezer to collect processes\n" " --weak-sysctls skip restoring sysctls that are not available\n" "\n" "* External resources support:\n" " --external RES dump objects from this list as external resources:\n" " Formats of RES on dump:\n" " tty[rdev:dev]\n" " file[mnt_id:inode]\n" " dev[major/minor]:NAME\n" " unix[ino]\n" " mnt[MOUNTPOINT]:COOKIE\n" " mnt[]{:AUTO_OPTIONS}\n" " Formats of RES on restore:\n" " dev[NAME]:DEVPATH\n" " veth[IFNAME]:OUTNAME{@BRIDGE}\n" " macvlan[IFNAME]:OUTNAME\n" " mnt[COOKIE]:ROOT\n" "\n" "* Special resources support:\n" " --" SK_EST_PARAM " checkpoint/restore established TCP connections\n" " --" SK_INFLIGHT_PARAM " skip (ignore) in-flight TCP connections\n" " -r|--root PATH change the root filesystem (when run in mount namespace)\n" " --evasive-devices use any path to a device file if the original one\n" " is inaccessible\n" " --link-remap allow one to link unlinked files back when possible\n" " --ghost-limit size limit max size of deleted file contents inside image\n" " --action-script FILE add an external action script\n" " -j|--" OPT_SHELL_JOB " allow one to dump and restore shell jobs\n" " -l|--" OPT_FILE_LOCKS " handle file locks, for safety, only used for container\n" " -L|--libdir path to a plugin directory (by default " CR_PLUGIN_DEFAULT ")\n" " --force-irmap force resolving names for inotify/fsnotify watches\n" " --irmap-scan-path FILE\n" " add a path the irmap hints to scan\n" " --manage-cgroups [m] dump/restore process' cgroups; argument can be one of\n" " 'none', 'props', 'soft' (default), 'full' or 'strict'\n" " --cgroup-root [controller:]/newroot\n" " on dump: change the root for the controller that will\n" " be dumped. By default, only the paths with tasks in\n" " them and below will be dumped.\n" " on restore: change the root cgroup the controller will\n" " be installed into. No controller means that root is the\n" " default for all controllers not specified\n" " --cgroup-props STRING\n" " define cgroup controllers and properties\n" " to be checkpointed, which are described\n" " via STRING using simplified YAML format\n" " --cgroup-props-file FILE\n" " same as --cgroup-props, but taking description\n" " from the path specified\n" " --cgroup-dump-controller NAME\n" " define cgroup controller to be dumped\n" " and skip anything else present in system\n" " --skip-mnt PATH ignore this mountpoint when dumping the mount namespace\n" " --enable-fs FSNAMES a comma separated list of filesystem names or \"all\"\n" " force criu to (try to) dump/restore these filesystem's\n" " mountpoints even if fs is not supported\n" " --inherit-fd fd[NUM]:RES\n" " Inherit file descriptors, treating fd NUM as being\n" " already opened via an existing RES, which can be:\n" " tty[rdev:dev]\n" " pipe[inode]\n" " socket[inode]\n" " file[mnt_id:inode]\n" " path/to/file\n" " --empty-ns net Create a namespace, but don't restore its properties\n" " (assuming it will be restored by action scripts)\n" " -J|--join-ns NS:{PID|NS_FILE}[,OPTIONS]\n" " Join existing namespace and restore process in it.\n" " Namespace can be specified as either pid or file path.\n" " OPTIONS can be used to specify parameters for userns:\n" " user:PID,UID,GID\n" "\n" "Check options:\n" " Without options, \"criu check\" checks availability of absolutely required\n" " kernel features, critical for performing dump and restore.\n" " --extra add check for extra kernel features\n" " --experimental add check for experimental kernel features\n" " --all same as --extra --experimental\n" " --feature FEAT only check a particular feature, one of:" ); pr_check_features(" ", ", ", 80); pr_msg( "\n" "* Logging:\n" " -o|--log-file FILE log file name\n" " --log-pid enable per-process logging to separate FILE.pid files\n" " -v[v...] increase verbosity (can use multiple v)\n" " -vNUM set verbosity to NUM (higher level means more output):\n" " -v1 - only errors and messages\n" " -v2 - also warnings (default level)\n" " -v3 - also information messages and timestamps\n" " -v4 - lots of debug\n" " --display-stats print out dump/restore stats\n" "\n" "* Memory dumping options:\n" " --track-mem turn on memory changes tracker in kernel\n" " --prev-images-dir DIR path to images from previous dump (relative to -D)\n" " --page-server send pages to page server (see options below as well)\n" " --auto-dedup when used on dump it will deduplicate \"old\" data in\n" " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" " --port PORT port of page server\n" " -d|--daemon run in the background after creating socket\n" " --status-fd FD write \\0 to the FD and close it once process is ready\n" " to handle requests\n" "\n" "Other options:\n" " -h|--help show this text\n" " -V|--version show version\n" ); return 0; opt_pid_missing: pr_msg("Error: pid not specified\n"); return 1; bad_arg: if (idx < 0) /* short option */ pr_msg("Error: invalid argument for -%c: %s\n", opt, optarg); else /* long option */ pr_msg("Error: invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; }
static int setup_opts_from_req(int sk, CriuOpts *req) { struct ucred ids; struct stat st; socklen_t ids_len = sizeof(struct ucred); char images_dir_path[PATH_MAX]; char work_dir_path[PATH_MAX]; int i; if (getsockopt(sk, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) { pr_perror("Can't get socket options"); return -1; } if (restrict_uid(ids.uid, ids.gid)) return -1; if (fstat(sk, &st)) { pr_perror("Can't get socket stat"); return -1; } BUG_ON(st.st_ino == -1); service_sk_ino = st.st_ino; /* open images_dir */ sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd); if (req->parent_img) opts.img_parent = req->parent_img; if (open_image_dir(images_dir_path) < 0) { pr_perror("Can't open images directory"); return -1; } /* get full path to images_dir to use in process title */ if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) { pr_perror("Can't readlink %s", images_dir_path); return -1; } /* chdir to work dir */ if (req->has_work_dir_fd) sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd); else strcpy(work_dir_path, images_dir_path); if (chdir(work_dir_path)) { pr_perror("Can't chdir to work_dir"); return -1; } /* initiate log file in work dir */ if (req->log_file) { if (strchr(req->log_file, '/')) { pr_perror("No subdirs are allowed in log_file name"); return -1; } opts.output = req->log_file; } else opts.output = DEFAULT_LOG_FILENAME; log_set_loglevel(req->log_level); if (log_init(opts.output) == -1) { pr_perror("Can't initiate log"); return -1; } /* checking flags from client */ if (req->has_leave_running && req->leave_running) opts.final_state = TASK_ALIVE; if (!req->has_pid) { req->has_pid = true; req->pid = ids.pid; } if (req->has_ext_unix_sk) opts.ext_unix_sk = req->ext_unix_sk; if (req->root) opts.root = req->root; if (req->has_rst_sibling) { if (!opts.swrk_restore) { pr_err("rst_sibling is not allowed in standalone service\n"); return -1; } opts.restore_sibling = req->rst_sibling; } if (req->has_tcp_established) opts.tcp_established_ok = req->tcp_established; if (req->has_evasive_devices) opts.evasive_devices = req->evasive_devices; if (req->has_shell_job) opts.shell_job = req->shell_job; if (req->has_file_locks) opts.handle_file_locks = req->file_locks; if (req->has_track_mem) opts.track_mem = req->track_mem; if (req->has_link_remap) opts.link_remap_ok = req->link_remap; if (req->has_auto_dedup) opts.auto_dedup = req->auto_dedup; if (req->has_force_irmap) opts.force_irmap = req->force_irmap; if (req->n_exec_cmd > 0) { opts.exec_cmd = xmalloc((req->n_exec_cmd + 1) * sizeof(char *)); memcpy(opts.exec_cmd, req->exec_cmd, req->n_exec_cmd * sizeof(char *)); opts.exec_cmd[req->n_exec_cmd] = NULL; } if (req->ps) { opts.use_page_server = true; opts.addr = req->ps->address; opts.ps_port = htons((short)req->ps->port); if (req->ps->has_fd) { if (!opts.swrk_restore) return -1; opts.ps_socket = req->ps->fd; } } if (req->notify_scripts && add_script(SCRIPT_RPC_NOTIFY, sk)) return -1; for (i = 0; i < req->n_veths; i++) { if (veth_pair_add(req->veths[i]->if_in, req->veths[i]->if_out)) return -1; } for (i = 0; i < req->n_ext_mnt; i++) { if (ext_mount_add(req->ext_mnt[i]->key, req->ext_mnt[i]->val)) return -1; } for (i = 0; i < req->n_cg_root; i++) { if (new_cg_root_add(req->cg_root[i]->ctrl, req->cg_root[i]->path)) return -1; } if (req->has_cpu_cap) opts.cpu_cap = req->cpu_cap; if (req->has_manage_cgroups) opts.manage_cgroups = req->manage_cgroups; return 0; }
int main(int argc, char *argv[]) { pid_t pid = 0, tree_id = 0; int ret = -1; bool usage_error = true; bool has_exec_cmd = false; int opt, idx; int log_level = LOG_UNSET; char *imgs_dir = "."; char *work_dir = NULL; static const char short_opts[] = "dsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:"; static struct option long_opts[] = { { "tree", required_argument, 0, 't' }, { "pid", required_argument, 0, 'p' }, { "leave-stopped", no_argument, 0, 's' }, { "leave-running", no_argument, 0, 'R' }, { "restore-detached", no_argument, 0, 'd' }, { "daemon", no_argument, 0, 'd' }, { "contents", no_argument, 0, 'c' }, { "file", required_argument, 0, 'f' }, { "fields", required_argument, 0, 'F' }, { "images-dir", required_argument, 0, 'D' }, { "work-dir", required_argument, 0, 'W' }, { "log-file", required_argument, 0, 'o' }, { "namespaces", required_argument, 0, 'n' }, { "root", required_argument, 0, 'r' }, { USK_EXT_PARAM, no_argument, 0, 'x' }, { "help", no_argument, 0, 'h' }, { SK_EST_PARAM, no_argument, 0, 1042 }, { "close", required_argument, 0, 1043 }, { "log-pid", no_argument, 0, 1044}, { "version", no_argument, 0, 'V'}, { "evasive-devices", no_argument, 0, 1045}, { "pidfile", required_argument, 0, 1046}, { "veth-pair", required_argument, 0, 1047}, { "action-script", required_argument, 0, 1049}, { LREMAP_PARAM, no_argument, 0, 1041}, { OPT_SHELL_JOB, no_argument, 0, 'j'}, { OPT_FILE_LOCKS, no_argument, 0, 'l'}, { "page-server", no_argument, 0, 1050}, { "address", required_argument, 0, 1051}, { "port", required_argument, 0, 1052}, { "prev-images-dir", required_argument, 0, 1053}, { "ms", no_argument, 0, 1054}, { "track-mem", no_argument, 0, 1055}, { "auto-dedup", no_argument, 0, 1056}, { "libdir", required_argument, 0, 'L'}, { "cpu-cap", required_argument, 0, 1057}, { "force-irmap", no_argument, 0, 1058}, { "ext-mount-map", required_argument, 0, 'M'}, { "exec-cmd", no_argument, 0, 1059}, { "manage-cgroups", no_argument, 0, 1060}, { "cgroup-root", required_argument, 0, 1061}, { }, }; BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE); cr_pb_init(); if (restrict_uid(getuid(), getgid())) return 1; if (argc < 2) goto usage; init_opts(); if (init_service_fd()) return 1; if (!strcmp(argv[1], "swrk")) { /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk <fd>" and is not for CLI/scripts. * The arguments semantics can change at any tyme with the * corresponding lib call change. */ opts.swrk_restore = true; return cr_service_work(atoi(argv[2])); } while (1) { idx = -1; opt = getopt_long(argc, argv, short_opts, long_opts, &idx); if (opt == -1) break; switch (opt) { case 's': opts.final_state = TASK_STOPPED; break; case 'R': opts.final_state = TASK_ALIVE; break; case 'x': opts.ext_unix_sk = true; break; case 'p': pid = atoi(optarg); if (pid <= 0) goto bad_arg; break; case 't': tree_id = atoi(optarg); if (tree_id <= 0) goto bad_arg; break; case 'c': opts.show_pages_content = true; break; case 'f': opts.show_dump_file = optarg; break; case 'F': opts.show_fmt = optarg; break; case 'r': opts.root = optarg; break; case 'd': opts.restore_detach = true; break; case 'D': imgs_dir = optarg; break; case 'W': work_dir = optarg; break; case 'o': opts.output = optarg; break; case 'n': if (parse_ns_string(optarg)) goto bad_arg; break; case 'v': if (log_level == LOG_UNSET) log_level = 0; if (optarg) { if (optarg[0] == 'v') /* handle -vvvvv */ log_level += strlen(optarg) + 1; else log_level = atoi(optarg); } else log_level++; break; case 1041: pr_info("Will allow link remaps on FS\n"); opts.link_remap_ok = true; break; case 1042: pr_info("Will dump TCP connections\n"); opts.tcp_established_ok = true; break; case 1043: { int fd; fd = atoi(optarg); pr_info("Closing fd %d\n", fd); close(fd); break; } case 1044: opts.log_file_per_pid = 1; break; case 1045: opts.evasive_devices = true; break; case 1046: opts.pidfile = optarg; break; case 1047: { char *aux; aux = strchr(optarg, '='); if (aux == NULL) goto bad_arg; *aux = '\0'; if (veth_pair_add(optarg, aux + 1)) return 1; } break; case 1049: { struct script *script; script = xmalloc(sizeof(struct script)); if (script == NULL) return 1; script->path = optarg; list_add(&script->node, &opts.scripts); } break; case 1050: opts.use_page_server = true; break; case 1051: opts.addr = optarg; break; case 1052: opts.ps_port = htons(atoi(optarg)); if (!opts.ps_port) goto bad_arg; break; case 'j': opts.shell_job = true; break; case 'l': opts.handle_file_locks = true; break; case 1053: opts.img_parent = optarg; break; case 1055: opts.track_mem = true; break; case 1056: opts.auto_dedup = true; break; case 1057: if (parse_cpu_cap(&opts, optarg)) goto usage; break; case 1058: opts.force_irmap = true; break; case 1054: opts.check_ms_kernel = true; break; case 'L': opts.libdir = optarg; break; case 1059: has_exec_cmd = true; break; case 1060: opts.manage_cgroups = true; break; case 1061: { char *path, *ctl; path = strchr(optarg, ':'); if (path) { *path = '\0'; path++; ctl = optarg; } else { path = optarg; ctl = NULL; } if (new_cg_root_add(ctl, path)) return -1; } break; case 'M': { char *aux; aux = strchr(optarg, ':'); if (aux == NULL) goto bad_arg; *aux = '\0'; if (ext_mount_add(optarg, aux + 1)) return 1; } break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) pr_msg("GitID: %s\n", CRIU_GITID); return 0; case 'h': usage_error = false; goto usage; default: goto usage; } } if (work_dir == NULL) work_dir = imgs_dir; if (optind >= argc) { pr_msg("Error: command is required\n"); goto usage; } if (has_exec_cmd) { if (argc - optind <= 1) { pr_msg("Error: --exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { pr_msg("Error: --exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); goto usage; } opts.exec_cmd = xmalloc((argc - optind) * sizeof(char *)); memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *)); opts.exec_cmd[argc - optind - 1] = NULL; } /* We must not open imgs dir, if service is called */ if (strcmp(argv[optind], "service")) { ret = open_image_dir(imgs_dir); if (ret < 0) return 1; } if (chdir(work_dir)) { pr_perror("Can't change directory to %s", work_dir); return 1; } log_set_loglevel(log_level); if (log_init(opts.output)) return 1; if (opts.img_parent) pr_info("Will do snapshot from %s\n", opts.img_parent); if (!strcmp(argv[optind], "dump")) { if (!tree_id) goto opt_pid_missing; return cr_dump_tasks(tree_id); } if (!strcmp(argv[optind], "pre-dump")) { if (!tree_id) goto opt_pid_missing; return cr_pre_dump_tasks(tree_id) != 0; } if (!strcmp(argv[optind], "restore")) { if (tree_id) pr_warn("Using -t with criu restore is obsoleted\n"); ret = cr_restore_tasks(); if (ret == 0 && opts.exec_cmd) { close_pid_proc(); execvp(opts.exec_cmd[0], opts.exec_cmd); pr_perror("Failed to exec command %s", opts.exec_cmd[0]); ret = 1; } return ret != 0; } if (!strcmp(argv[optind], "show")) return cr_show(pid) != 0; if (!strcmp(argv[optind], "check")) return cr_check() != 0; if (!strcmp(argv[optind], "exec")) { if (!pid) pid = tree_id; /* old usage */ if (!pid) goto opt_pid_missing; return cr_exec(pid, argv + optind + 1) != 0; } if (!strcmp(argv[optind], "page-server")) return cr_page_server(opts.restore_detach) > 0 ? 0 : 1; if (!strcmp(argv[optind], "service")) return cr_service(opts.restore_detach); if (!strcmp(argv[optind], "dedup")) return cr_dedup() != 0; pr_msg("Error: unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" " criu dump|pre-dump -t PID [<options>]\n" " criu restore [<options>]\n" " criu show (-D DIR)|(-f FILE) [<options>]\n" " criu check [--ms]\n" " criu exec -p PID <syscall-string>\n" " criu page-server\n" " criu service [<options>]\n" " criu dedup\n" "\n" "Commands:\n" " dump checkpoint a process/tree identified by pid\n" " pre-dump pre-dump task(s) minimizing their frozen time\n" " restore restore a process/tree\n" " show show dump file(s) contents\n" " check checks whether the kernel support is up-to-date\n" " exec execute a system call by other task\n" " page-server launch page server\n" " service launch service\n" " dedup remove duplicates in memory dump\n" ); if (usage_error) { pr_msg("\nTry -h|--help for more info\n"); return 1; } pr_msg("\n" "Dump/Restore options:\n" "\n" "* Generic:\n" " -t|--tree PID checkpoint a process tree identified by PID\n" " -d|--restore-detached detach after restore\n" " -s|--leave-stopped leave tasks in stopped state after checkpoint\n" " -R|--leave-running leave tasks in running state after checkpoint\n" " -D|--images-dir DIR directory for image files\n" " --pidfile FILE write root task, service or page-server pid to FILE\n" " -W|--work-dir DIR directory to cd and write logs/pidfiles/stats to\n" " (if not specified, value of --images-dir is used)\n" " --cpu-cap CAP require certain cpu capability. CAP: may be one of:\n" " 'fpu','all'. To disable capability, prefix it with '^'.\n" " --exec-cmd execute the command specified after '--' on successful\n" " restore making it the parent of the restored process\n" "\n" "* Special resources support:\n" " -x|--" USK_EXT_PARAM " allow external unix connections\n" " --" SK_EST_PARAM " checkpoint/restore established TCP connections\n" " -r|--root PATH change the root filesystem (when run in mount namespace)\n" " --evasive-devices use any path to a device file if the original one\n" " is inaccessible\n" " --veth-pair IN=OUT map inside veth device name to outside one\n" " --link-remap allow to link unlinked files back when possible\n" " --action-script FILE add an external action script\n" " -j|--" OPT_SHELL_JOB " allow to dump and restore shell jobs\n" " -l|--" OPT_FILE_LOCKS " handle file locks, for safety, only used for container\n" " -L|--libdir path to a plugin directory (by default " CR_PLUGIN_DEFAULT ")\n" " --force-irmap force resolving names for inotify/fsnotify watches\n" " -M|--ext-mount-map KEY:VALUE\n" " add external mount mapping\n" " --manage-cgroups dump or restore cgroups the process is in\n" " --cgroup-root [controller:]/newroot\n" " change the root cgroup the controller will be\n" " installed into. No controller means that root is the\n" " default for all controllers not specified.\n" "\n" "* Logging:\n" " -o|--log-file FILE log file name\n" " --log-pid enable per-process logging to separate FILE.pid files\n" " -v[NUM] set logging level (higher level means more output):\n" " -v1|-v - only errors and messages\n" " -v2|-vv - also warnings (default level)\n" " -v3|-vvv - also information messages and timestamps\n" " -v4|-vvvv - lots of debug\n" "\n" "* Memory dumping options:\n" " --track-mem turn on memory changes tracker in kernel\n" " --prev-images-dir DIR path to images from previous dump (relative to -D)\n" " --page-server send pages to page server (see options below as well)\n" " --auto-dedup when used on dump it will deduplicate \"old\" data in\n" " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image.\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" " --port PORT port of page server\n" " -d|--daemon run in the background after creating socket\n" "\n" "Show options:\n" " -f|--file FILE show contents of a checkpoint file\n" " -F|--fields FIELDS show specified fields (comma separated)\n" " -D|--images-dir DIR directory where to get images from\n" " -c|--contents show contents of pages dumped in hexdump format\n" " -p|--pid PID show files relevant to PID (filter -D flood)\n" "\n" "Other options:\n" " -h|--help show this text\n" " -V|--version show version\n" " --ms don't check not yet merged kernel features\n" ); return 0; opt_pid_missing: pr_msg("Error: pid not specified\n"); return 1; bad_arg: if (idx < 0) /* short option */ pr_msg("Error: invalid argument for -%c: %s\n", opt, optarg); else /* long option */ pr_msg("Error: invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; }
int main(int argc, char* argv[]) { if (argc <= 2) { printf("Usage: %s ip_address port_number\n", argv[0]); return 0; } log_globals_init(&g_log); log_init(&g_log, "jhttpserver.log", NULL); log_set_loglevel(&g_log, LOG_DEBUG); const char* ip = argv[1]; int port = atoi(argv[2]); INFO(&g_log, "jhttpserver", "%s : %d", ip, port); add_signal(SIGPIPE, SIG_IGN, TRUE); thread_pool* pool = create_thread_pool(8 ,10000); if (pool == NULL) { printf("create thread pool is failed."); ERROR(&g_log, "jhttpserver", "create thread pool is failed."); return 1; } http_conn* users = (http_conn*)malloc(sizeof(http_conn) * MAX_FD); assert(users); int listen_fd = socket(PF_INET, SOCK_STREAM, 0); struct linger tmp = {1, 0}; setsockopt(listen_fd, SOL_SOCKET, SO_LINGER, &tmp, sizeof(tmp)); int ret = 0; struct sockaddr_in address; bzero(&address, sizeof(address)); address.sin_family = AF_INET; address.sin_port = htons(port); inet_pton(AF_INET, ip, &address.sin_addr); ret = bind(listen_fd, (struct sockaddr *)&address, sizeof(address)); assert(ret >= 0); ret = listen(listen_fd, 5); assert(ret >= 0); struct epoll_event events[MAX_EVENT_NUMBER]; epollfd = epoll_create(5); assert(epollfd != -1); add_fd(epollfd, listen_fd, false); while (true) { int number = epoll_wait(epollfd, events, MAX_EVENT_NUMBER, -1); if ((number < 0) && (errno != EINTR)) { printf("epoll failure\n"); break; } int i=0; for (; i<number; i++) { int sockfd = events[i].data.fd; if (sockfd == listen_fd) { struct sockaddr_in client_address; socklen_t client_addr_len = sizeof(client_address); int conn_fd = accept(listen_fd, (struct sockaddr*)&client_address, &client_addr_len); if (conn_fd < 0) { printf("errno is : %d\n", errno); continue; } if (user_count > MAX_FD) { show_error(conn_fd, "Internal server busy"); continue; } init_new_connect(&users[conn_fd], conn_fd, &client_address); } else if (events[i].events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) { close_connect(&users[sockfd]); } else if (events[i].events & EPOLLIN) { if (http_conn_read(&users[sockfd])) { add_conn(pool, users + sockfd); } else { close_connect(&users[sockfd]); } } else if (events[i].events & EPOLLOUT) { if (!http_conn_write(&users[sockfd])) { close_connect(&users[sockfd]); } } } } close(epollfd); close(listen_fd); free(users); destroy_thread_pool(pool); return 0; }
/* * The main routine to restore task via sigreturn. * This one is very special, we never return there * but use sigreturn facility to restore core registers * and jump execution to some predefined ip read from * core file. */ long __export_restore_task(struct task_restore_args *args) { long ret = -1; int i; VmaEntry *vma_entry; unsigned long va; struct rt_sigframe *rt_sigframe; unsigned long new_sp; k_rtsigset_t to_block; pid_t my_pid = sys_getpid(); rt_sigaction_t act; bootstrap_start = args->bootstrap_start; bootstrap_len = args->bootstrap_len; #ifdef CONFIG_VDSO vdso_rt_size = args->vdso_rt_size; #endif task_entries = args->task_entries; helpers = args->helpers; n_helpers = args->n_helpers; *args->breakpoint = rst_sigreturn; ksigfillset(&act.rt_sa_mask); act.rt_sa_handler = sigchld_handler; act.rt_sa_flags = SA_SIGINFO | SA_RESTORER | SA_RESTART; act.rt_sa_restorer = cr_restore_rt; sys_sigaction(SIGCHLD, &act, NULL, sizeof(k_rtsigset_t)); log_set_fd(args->logfd); log_set_loglevel(args->loglevel); cap_last_cap = args->cap_last_cap; pr_info("Switched to the restorer %d\n", my_pid); #ifdef CONFIG_VDSO if (vdso_do_park(&args->vdso_sym_rt, args->vdso_rt_parked_at, vdso_rt_size)) goto core_restore_end; #endif if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len)) goto core_restore_end; /* Shift private vma-s to the left */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->end >= TASK_SIZE) continue; if (vma_entry->start > vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* Shift private vma-s to the right */ for (i = args->nr_vmas - 1; i >= 0; i--) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->start > TASK_SIZE) continue; if (vma_entry->start < vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* * OK, lets try to map new one. */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (vma_priv(vma_entry)) continue; va = restore_mapping(vma_entry); if (va != vma_entry->start) { pr_err("Can't restore %"PRIx64" mapping with %lx\n", vma_entry->start, va); goto core_restore_end; } } #ifdef CONFIG_VDSO /* * Proxify vDSO. */ for (i = 0; i < args->nr_vmas; i++) { if (vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VDSO) || vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VVAR)) { if (vdso_proxify("dumpee", &args->vdso_sym_rt, args->vdso_rt_parked_at, i, args->tgt_vmas, args->nr_vmas)) goto core_restore_end; break; } } #endif /* * Walk though all VMAs again to drop PROT_WRITE * if it was not there. */ for (i = 0; i < args->nr_vmas; i++) { vma_entry = args->tgt_vmas + i; if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) continue; if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) { struct shmem_info *entry; entry = find_shmem(args->shmems, args->nr_shmems, vma_entry->shmid); if (entry && entry->pid == my_pid && entry->start == vma_entry->start) futex_set_and_wake(&entry->lock, 1); } if (vma_entry->prot & PROT_WRITE) continue; sys_mprotect(decode_pointer(vma_entry->start), vma_entry_len(vma_entry), vma_entry->prot); } /* * Finally restore madivse() bits */ for (i = 0; i < args->nr_vmas; i++) { unsigned long m; vma_entry = args->tgt_vmas + i; if (!vma_entry->has_madv || !vma_entry->madv) continue; for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { if (vma_entry->madv & (1ul << m)) { ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); if (ret) { pr_err("madvise(%"PRIx64", %"PRIu64", %ld) " "failed with %ld\n", vma_entry->start, vma_entry_len(vma_entry), m, ret); goto core_restore_end; } } } } ret = 0; /* * Tune up the task fields. */ ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA, (long)args->mm.mm_start_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA, (long)args->mm.mm_end_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK, (long)args->mm.mm_start_stack, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK, (long)args->mm.mm_start_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK, (long)args->mm.mm_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START, (long)args->mm.mm_arg_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END, (long)args->mm.mm_arg_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START, (long)args->mm.mm_env_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END, (long)args->mm.mm_env_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV, (long)args->mm_saved_auxv, args->mm_saved_auxv_size); if (ret) goto core_restore_end; /* * Because of requirements applied from kernel side * we need to restore /proc/pid/exe symlink late, * after old existing VMAs are superseded with * new ones from image file. */ ret = restore_self_exe_late(args); if (ret) goto core_restore_end; /* * We need to prepare a valid sigframe here, so * after sigreturn the kernel will pick up the * registers from the frame, set them up and * finally pass execution to the new IP. */ rt_sigframe = (void *)args->t->mem_zone.rt_sigframe; if (restore_thread_common(rt_sigframe, args->t)) goto core_restore_end; /* * Threads restoration. This requires some more comments. This * restorer routine and thread restorer routine has the following * memory map, prepared by a caller code. * * | <-- low addresses high addresses --> | * +-------------------------------------------------------+-----------------------+ * | this proc body | own stack | rt_sigframe space | thread restore zone | * +-------------------------------------------------------+-----------------------+ * * where each thread restore zone is the following * * | <-- low addresses high addresses --> | * +--------------------------------------------------------------------------+ * | thread restore proc | thread1 stack | thread1 rt_sigframe | * +--------------------------------------------------------------------------+ */ if (args->nr_threads > 1) { struct thread_restore_args *thread_args = args->thread_args; long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; long last_pid_len; long parent_tid; int i, fd; fd = args->fd_last_pid; ret = sys_flock(fd, LOCK_EX); if (ret) { pr_err("Can't lock last_pid %d\n", fd); goto core_restore_end; } for (i = 0; i < args->nr_threads; i++) { char last_pid_buf[16], *s; /* skip self */ if (thread_args[i].pid == args->t->pid) continue; new_sp = restorer_stack(thread_args + i); last_pid_len = vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); sys_lseek(fd, 0, SEEK_SET); ret = sys_write(fd, s, last_pid_len); if (ret < 0) { pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); goto core_restore_end; } /* * To achieve functionality like libc's clone() * we need a pure assembly here, because clone()'ed * thread will run with own stack and we must not * have any additional instructions... oh, dear... */ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); } ret = sys_flock(fd, LOCK_UN); if (ret) { pr_err("Can't unlock last_pid %ld\n", ret); goto core_restore_end; } } sys_close(args->fd_last_pid); restore_rlims(args); ret = create_posix_timers(args); if (ret < 0) { pr_err("Can't restore posix timers %ld\n", ret); goto core_restore_end; } ret = timerfd_arm(args); if (ret < 0) { pr_err("Can't restore timerfd %ld\n", ret); goto core_restore_end; } pr_info("%ld: Restored\n", sys_getpid()); futex_set(&zombies_inprogress, args->nr_zombies); restore_finish_stage(CR_STATE_RESTORE); futex_wait_while_gt(&zombies_inprogress, 0); if (wait_helpers(args) < 0) goto core_restore_end; ksigfillset(&to_block); ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t)); if (ret) { pr_err("Unable to block signals %ld", ret); goto core_restore_end; } sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); ret = restore_signals(args->siginfo, args->siginfo_nr, true); if (ret) goto core_restore_end; ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, false); if (ret) goto core_restore_end; restore_finish_stage(CR_STATE_RESTORE_SIGCHLD); rst_tcp_socks_all(args); /* * Writing to last-pid is CAP_SYS_ADMIN protected, * turning off TCP repair is CAP_SYS_NED_ADMIN protected, * thus restore* creds _after_ all of the above. */ ret = restore_creds(&args->creds); ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_pdeath_sig(args->t); futex_set_and_wake(&thread_inprogress, args->nr_threads); restore_finish_stage(CR_STATE_RESTORE_CREDS); if (ret) BUG(); /* Wait until children stop to use args->task_entries */ futex_wait_while_gt(&thread_inprogress, 1); log_set_fd(-1); /* * The code that prepared the itimers makes shure the * code below doesn't fail due to bad timing values. */ #define itimer_armed(args, i) \ (args->itimers[i].it_interval.tv_sec || \ args->itimers[i].it_interval.tv_usec) if (itimer_armed(args, 0)) sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL); if (itimer_armed(args, 1)) sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL); if (itimer_armed(args, 2)) sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL); restore_posix_timers(args); sys_munmap(args->rst_mem, args->rst_mem_size); /* * Sigframe stack. */ new_sp = (long)rt_sigframe + SIGFRAME_OFFSET; /* * Prepare the stack and call for sigreturn, * pure assembly since we don't need any additional * code insns from gcc. */ rst_sigreturn(new_sp); core_restore_end: futex_abort_and_wake(&task_entries->nr_in_progress); pr_err("Restorer fail %ld\n", sys_getpid()); sys_exit_group(1); return -1; }
static int setup_opts_from_req(int sk, CriuOpts *req) { struct ucred ids; struct stat st; socklen_t ids_len = sizeof(struct ucred); char images_dir_path[PATH_MAX]; char work_dir_path[PATH_MAX]; char status_fd[PATH_MAX]; bool output_changed_by_rpc_conf = false; bool work_changed_by_rpc_conf = false; bool imgs_changed_by_rpc_conf = false; int i; bool dummy = false; if (getsockopt(sk, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) { pr_perror("Can't get socket options"); goto err; } if (fstat(sk, &st)) { pr_perror("Can't get socket stat"); goto err; } BUG_ON(st.st_ino == -1); service_sk_ino = st.st_ino; /* * Evaluate an additional configuration file if specified. * This needs to happen twice, because it is needed early to detect * things like work_dir, imgs_dir and logfile. The second parsing * of the optional RPC configuration file happens at the end and * overwrites all options set via RPC. */ if (req->config_file) { char *tmp_output = opts.output; char *tmp_work = opts.work_dir; char *tmp_imgs = opts.imgs_dir; opts.output = NULL; opts.work_dir = NULL; opts.imgs_dir = NULL; rpc_cfg_file = req->config_file; i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF); if (i) { xfree(tmp_output); xfree(tmp_work); xfree(tmp_imgs); goto err; } /* If this is non-NULL, the RPC configuration file had a value, use it.*/ if (opts.output) output_changed_by_rpc_conf = true; /* If this is NULL, use the old value if it was set. */ if (!opts.output && tmp_output) { opts.output = tmp_output; tmp_output = NULL; } if (opts.work_dir) work_changed_by_rpc_conf = true; if (!opts.work_dir && tmp_work) { opts.work_dir = tmp_work; tmp_work = NULL; } if (opts.imgs_dir) imgs_changed_by_rpc_conf = true; /* * As the images directory is a required RPC setting, it is not * necessary to use the value from other configuration files. * Either it is set in the RPC configuration file or it is set * via RPC. */ xfree(tmp_output); xfree(tmp_work); xfree(tmp_imgs); } /* * open images_dir - images_dir_fd is a required RPC parameter * * This assumes that if opts.imgs_dir is set we have a value * from the configuration file parser. The test to see that * imgs_changed_by_rpc_conf is true is used to make sure the value * is from the RPC configuration file. * The idea is that only the RPC configuration file is able to * overwrite RPC settings: * * apply_config(global_conf) * * apply_config(user_conf) * * apply_config(environment variable) * * apply_rpc_options() * * apply_config(rpc_conf) */ if (imgs_changed_by_rpc_conf) strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1); else sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd); if (req->parent_img) SET_CHAR_OPTS(img_parent, req->parent_img); if (open_image_dir(images_dir_path) < 0) { pr_perror("Can't open images directory"); goto err; } /* get full path to images_dir to use in process title */ if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) { pr_perror("Can't readlink %s", images_dir_path); goto err; } /* chdir to work dir */ if (work_changed_by_rpc_conf) /* Use the value from the RPC configuration file first. */ strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); else if (req->has_work_dir_fd) /* Use the value set via RPC. */ sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd); else if (opts.work_dir) /* Use the value from one of the other configuration files. */ strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); else /* Use the images directory a work directory. */ strcpy(work_dir_path, images_dir_path); if (chdir(work_dir_path)) { pr_perror("Can't chdir to work_dir"); goto err; } /* initiate log file in work dir */ if (req->log_file && !output_changed_by_rpc_conf) { /* * If RPC sets a log file and if there nothing from the * RPC configuration file, use the RPC value. */ if (strchr(req->log_file, '/')) { pr_perror("No subdirs are allowed in log_file name"); goto err; } SET_CHAR_OPTS(output, req->log_file); } else if (!opts.output) { SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME); } /* This is needed later to correctly set the log_level */ opts.log_level = req->log_level; log_set_loglevel(req->log_level); if (log_init(opts.output) == -1) { pr_perror("Can't initiate log"); goto err; } if (req->config_file) { pr_debug("Overwriting RPC settings with values from %s\n", req->config_file); } if (kerndat_init()) return 1; if (log_keep_err()) { pr_perror("Can't tune log"); goto err; } /* checking flags from client */ if (req->has_leave_running && req->leave_running) opts.final_state = TASK_ALIVE; if (!req->has_pid) { req->has_pid = true; req->pid = ids.pid; } if (req->has_ext_unix_sk) { opts.ext_unix_sk = req->ext_unix_sk; for (i = 0; i < req->n_unix_sk_ino; i++) { if (unix_sk_id_add((unsigned int)req->unix_sk_ino[i]->inode) < 0) goto err; } } if (req->root) SET_CHAR_OPTS(root, req->root); if (req->has_rst_sibling) { if (!opts.swrk_restore) { pr_err("rst_sibling is not allowed in standalone service\n"); goto err; } opts.restore_sibling = req->rst_sibling; } if (req->has_tcp_established) opts.tcp_established_ok = req->tcp_established; if (req->has_tcp_skip_in_flight) opts.tcp_skip_in_flight = req->tcp_skip_in_flight; if (req->has_tcp_close) opts.tcp_close = req->tcp_close; if (req->has_weak_sysctls) opts.weak_sysctls = req->weak_sysctls; if (req->has_evasive_devices) opts.evasive_devices = req->evasive_devices; if (req->has_shell_job) opts.shell_job = req->shell_job; if (req->has_file_locks) opts.handle_file_locks = req->file_locks; if (req->has_track_mem) opts.track_mem = req->track_mem; if (req->has_link_remap) opts.link_remap_ok = req->link_remap; if (req->has_auto_dedup) opts.auto_dedup = req->auto_dedup; if (req->has_force_irmap) opts.force_irmap = req->force_irmap; if (req->n_exec_cmd > 0) { opts.exec_cmd = xmalloc((req->n_exec_cmd + 1) * sizeof(char *)); memcpy(opts.exec_cmd, req->exec_cmd, req->n_exec_cmd * sizeof(char *)); opts.exec_cmd[req->n_exec_cmd] = NULL; } if (req->has_lazy_pages) { opts.lazy_pages = req->lazy_pages; } if (req->ps) { opts.port = (short)req->ps->port; if (!opts.lazy_pages) { opts.use_page_server = true; if (req->ps->address) SET_CHAR_OPTS(addr, req->ps->address); else opts.addr = NULL; if (req->ps->has_fd) { if (!opts.swrk_restore) goto err; opts.ps_socket = req->ps->fd; } } } if (req->notify_scripts && add_rpc_notify(sk)) goto err; for (i = 0; i < req->n_veths; i++) { if (veth_pair_add(req->veths[i]->if_in, req->veths[i]->if_out)) goto err; } for (i = 0; i < req->n_ext_mnt; i++) { if (ext_mount_add(req->ext_mnt[i]->key, req->ext_mnt[i]->val)) goto err; } for (i = 0; i < req->n_join_ns; i++) { if (join_ns_add(req->join_ns[i]->ns, req->join_ns[i]->ns_file, req->join_ns[i]->extra_opt)) goto err; } if (req->n_inherit_fd && !opts.swrk_restore) { pr_err("inherit_fd is not allowed in standalone service\n"); goto err; } for (i = 0; i < req->n_inherit_fd; i++) { if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key)) goto err; } for (i = 0; i < req->n_external; i++) if (add_external(req->external[i])) goto err; for (i = 0; i < req->n_cg_root; i++) { if (new_cg_root_add(req->cg_root[i]->ctrl, req->cg_root[i]->path)) goto err; } for (i = 0; i < req->n_enable_fs; i++) { if (!add_fsname_auto(req->enable_fs[i])) goto err; } for (i = 0; i < req->n_skip_mnt; i++) { if (!add_skip_mount(req->skip_mnt[i])) goto err; } if (req->has_cpu_cap) { opts.cpu_cap = req->cpu_cap; opts.cpu_cap |= CPU_CAP_IMAGE; } /* * FIXME: For backward compatibility we setup * soft mode here, need to enhance to support * other modes as well via separate option * probably. */ if (req->has_manage_cgroups) opts.manage_cgroups = req->manage_cgroups ? CG_MODE_SOFT : CG_MODE_IGNORE; /* Override the manage_cgroup if mode is set explicitly */ if (req->has_manage_cgroups_mode) { unsigned int mode; switch (req->manage_cgroups_mode) { case CRIU_CG_MODE__IGNORE: mode = CG_MODE_IGNORE; break; case CRIU_CG_MODE__CG_NONE: mode = CG_MODE_NONE; break; case CRIU_CG_MODE__PROPS: mode = CG_MODE_PROPS; break; case CRIU_CG_MODE__SOFT: mode = CG_MODE_SOFT; break; case CRIU_CG_MODE__FULL: mode = CG_MODE_FULL; break; case CRIU_CG_MODE__STRICT: mode = CG_MODE_STRICT; break; case CRIU_CG_MODE__DEFAULT: mode = CG_MODE_DEFAULT; break; default: goto err; } opts.manage_cgroups = mode; } if (req->freeze_cgroup) SET_CHAR_OPTS(freeze_cgroup, req->freeze_cgroup); if (req->lsm_profile) { opts.lsm_supplied = true; SET_CHAR_OPTS(lsm_profile, req->lsm_profile); } if (req->has_timeout) opts.timeout = req->timeout; if (req->cgroup_props) SET_CHAR_OPTS(cgroup_props, req->cgroup_props); if (req->cgroup_props_file) SET_CHAR_OPTS(cgroup_props_file, req->cgroup_props_file); for (i = 0; i < req->n_cgroup_dump_controller; i++) { if (!cgp_add_dump_controller(req->cgroup_dump_controller[i])) goto err; } if (req->has_auto_ext_mnt) opts.autodetect_ext_mounts = req->auto_ext_mnt; if (req->has_ext_sharing) opts.enable_external_sharing = req->ext_sharing; if (req->has_ext_masters) opts.enable_external_masters = req->ext_masters; if (req->has_ghost_limit) opts.ghost_limit = req->ghost_limit; if (req->has_empty_ns) { opts.empty_ns = req->empty_ns; if (req->empty_ns & ~(CLONE_NEWNET)) goto err; } if (req->n_irmap_scan_paths) { for (i = 0; i < req->n_irmap_scan_paths; i++) { if (irmap_scan_path_add(req->irmap_scan_paths[i])) goto err; } } if (req->has_status_fd) { sprintf(status_fd, "/proc/%d/fd/%d", ids.pid, req->status_fd); opts.status_fd = open(status_fd, O_WRONLY); if (opts.status_fd < 0) goto err; } if (req->orphan_pts_master) opts.orphan_pts_master = true; /* Evaluate additional configuration file a second time to overwrite * all RPC settings. */ if (req->config_file) { rpc_cfg_file = req->config_file; i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF); if (i) goto err; } log_set_loglevel(opts.log_level); if (check_options()) goto err; return 0; err: set_cr_errno(EBADRQC); return -1; }
static int setup_opts_from_req(int sk, CriuOpts *req) { struct ucred ids; struct stat st; socklen_t ids_len = sizeof(struct ucred); char images_dir_path[PATH_MAX]; if (getsockopt(sk, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) { pr_perror("Can't get socket options"); return -1; } restrict_uid(ids.uid, ids.gid); if (fstat(sk, &st)) { pr_perror("Can't get socket stat"); return -1; } BUG_ON(st.st_ino == -1); service_sk_ino = st.st_ino; /* going to dir, where to place/get images*/ sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd); if (chdir(images_dir_path)) { pr_perror("Can't chdir to images directory"); return -1; } if (open_image_dir(".") < 0) return -1; /* initiate log file in imgs dir */ if (req->log_file) opts.output = req->log_file; else opts.output = DEFAULT_LOG_FILENAME; log_set_loglevel(req->log_level); if (log_init(opts.output) == -1) { pr_perror("Can't initiate log"); return -1; } /* checking flags from client */ if (req->has_leave_running && req->leave_running) opts.final_state = TASK_ALIVE; if (!req->has_pid) { req->has_pid = true; req->pid = ids.pid; } if (req->has_ext_unix_sk) opts.ext_unix_sk = req->ext_unix_sk; if (req->has_tcp_established) opts.tcp_established_ok = req->tcp_established; if (req->has_evasive_devices) opts.evasive_devices = req->evasive_devices; if (req->has_shell_job) opts.shell_job = req->shell_job; if (req->has_file_locks) opts.handle_file_locks = req->file_locks; return 0; }
/* * The main routine to restore task via sigreturn. * This one is very special, we never return there * but use sigreturn facility to restore core registers * and jump execution to some predefined ip read from * core file. */ long __export_restore_task(struct task_restore_core_args *args) { long ret = -1; VmaEntry *vma_entry; u64 va; unsigned long premmapped_end = args->premmapped_addr + args->premmapped_len; struct rt_sigframe *rt_sigframe; unsigned long new_sp; pid_t my_pid = sys_getpid(); rt_sigaction_t act; task_entries = args->task_entries; ksigfillset(&act.rt_sa_mask); act.rt_sa_handler = sigchld_handler; act.rt_sa_flags = SA_SIGINFO | SA_RESTORER | SA_RESTART; act.rt_sa_restorer = cr_restore_rt; sys_sigaction(SIGCHLD, &act, NULL, sizeof(k_rtsigset_t)); log_set_fd(args->logfd); log_set_loglevel(args->loglevel); cap_last_cap = args->cap_last_cap; pr_info("Switched to the restorer %d\n", my_pid); for (vma_entry = args->self_vmas; vma_entry->start != 0; vma_entry++) { unsigned long addr = vma_entry->start; unsigned long len; if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; pr_debug("Examine %"PRIx64"-%"PRIx64"\n", vma_entry->start, vma_entry->end); if (addr < args->premmapped_addr) { if (vma_entry->end >= args->premmapped_addr) len = args->premmapped_addr - addr; else len = vma_entry->end - vma_entry->start; if (sys_munmap((void *) addr, len)) { pr_err("munmap fail for %lx - %lx\n", addr, addr + len); goto core_restore_end; } } if (vma_entry->end >= TASK_SIZE) continue; if (vma_entry->end > premmapped_end) { if (vma_entry->start < premmapped_end) addr = premmapped_end; len = vma_entry->end - addr; if (sys_munmap((void *) addr, len)) { pr_err("munmap fail for %lx - %lx\n", addr, addr + len); goto core_restore_end; } } } sys_munmap(args->self_vmas, ((void *)(vma_entry + 1) - ((void *)args->self_vmas))); /* Shift private vma-s to the left */ for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) { if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->end >= TASK_SIZE) continue; if (vma_entry->start > vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* Shift private vma-s to the right */ for (vma_entry = args->tgt_vmas + args->nr_vmas -1; vma_entry >= args->tgt_vmas; vma_entry--) { if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (!vma_priv(vma_entry)) continue; if (vma_entry->start > TASK_SIZE) continue; if (vma_entry->start < vma_entry->shmid) break; if (vma_remap(vma_premmaped_start(vma_entry), vma_entry->start, vma_entry_len(vma_entry))) goto core_restore_end; } /* * OK, lets try to map new one. */ for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) { if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; if (vma_priv(vma_entry)) continue; va = restore_mapping(vma_entry); if (va != vma_entry->start) { pr_err("Can't restore %"PRIx64" mapping with %"PRIx64"\n", vma_entry->start, va); goto core_restore_end; } } /* * Walk though all VMAs again to drop PROT_WRITE * if it was not there. */ for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) { if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) continue; if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) { struct shmem_info *entry; entry = find_shmem(args->shmems, vma_entry->shmid); if (entry && entry->pid == my_pid && entry->start == vma_entry->start) futex_set_and_wake(&entry->lock, 1); } if (vma_entry->prot & PROT_WRITE) continue; sys_mprotect(decode_pointer(vma_entry->start), vma_entry_len(vma_entry), vma_entry->prot); } /* * Finally restore madivse() bits */ for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) { unsigned long i; if (!vma_entry->has_madv || !vma_entry->madv) continue; for (i = 0; i < sizeof(vma_entry->madv) * 8; i++) { if (vma_entry->madv & (1ul << i)) { ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), i); if (ret) { pr_err("madvise(%"PRIx64", %"PRIu64", %ld) " "failed with %ld\n", vma_entry->start, vma_entry_len(vma_entry), i, ret); goto core_restore_end; } } } } sys_munmap(args->tgt_vmas, ((void *)(vma_entry + 1) - ((void *)args->tgt_vmas))); ret = sys_munmap(args->shmems, SHMEMS_SIZE); if (ret < 0) { pr_err("Can't unmap shmem %ld\n", ret); goto core_restore_end; } /* * Tune up the task fields. */ ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA, (long)args->mm.mm_start_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA, (long)args->mm.mm_end_data, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK, (long)args->mm.mm_start_stack, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK, (long)args->mm.mm_start_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK, (long)args->mm.mm_brk, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START, (long)args->mm.mm_arg_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END, (long)args->mm.mm_arg_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START, (long)args->mm.mm_env_start, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END, (long)args->mm.mm_env_end, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV, (long)args->mm_saved_auxv, args->mm_saved_auxv_size); if (ret) goto core_restore_end; /* * Because of requirements applied from kernel side * we need to restore /proc/pid/exe symlink late, * after old existing VMAs are superseded with * new ones from image file. */ ret = restore_self_exe_late(args); if (ret) goto core_restore_end; /* * We need to prepare a valid sigframe here, so * after sigreturn the kernel will pick up the * registers from the frame, set them up and * finally pass execution to the new IP. */ rt_sigframe = (void *)args->t->mem_zone.rt_sigframe + 8; if (restore_thread_common(rt_sigframe, args->t)) goto core_restore_end; /* * Threads restoration. This requires some more comments. This * restorer routine and thread restorer routine has the following * memory map, prepared by a caller code. * * | <-- low addresses high addresses --> | * +-------------------------------------------------------+-----------------------+ * | this proc body | own stack | heap | rt_sigframe space | thread restore zone | * +-------------------------------------------------------+-----------------------+ * * where each thread restore zone is the following * * | <-- low addresses high addresses --> | * +--------------------------------------------------------------------------+ * | thread restore proc | thread1 stack | thread1 heap | thread1 rt_sigframe | * +--------------------------------------------------------------------------+ */ if (args->nr_threads > 1) { struct thread_restore_args *thread_args = args->thread_args; long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; long last_pid_len; long parent_tid; int i, fd; fd = sys_open(LAST_PID_PATH, O_RDWR, LAST_PID_PERM); if (fd < 0) { pr_err("Can't open last_pid %d\n", fd); goto core_restore_end; } ret = sys_flock(fd, LOCK_EX); if (ret) { pr_err("Can't lock last_pid %d\n", fd); goto core_restore_end; } for (i = 0; i < args->nr_threads; i++) { char last_pid_buf[16], *s; /* skip self */ if (thread_args[i].pid == args->t->pid) continue; mutex_lock(&args->rst_lock); new_sp = RESTORE_ALIGN_STACK((long)thread_args[i].mem_zone.stack, sizeof(thread_args[i].mem_zone.stack)); last_pid_len = vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); ret = sys_write(fd, s, last_pid_len); if (ret < 0) { pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); goto core_restore_end; } /* * To achieve functionality like libc's clone() * we need a pure assembly here, because clone()'ed * thread will run with own stack and we must not * have any additional instructions... oh, dear... */ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); } ret = sys_flock(fd, LOCK_UN); if (ret) { pr_err("Can't unlock last_pid %ld\n", ret); goto core_restore_end; } sys_close(fd); } restore_rlims(args); pr_info("%ld: Restored\n", sys_getpid()); futex_set(&zombies_inprogress, args->nr_zombies); restore_finish_stage(CR_STATE_RESTORE); futex_wait_while_gt(&zombies_inprogress, 0); sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); ret = restore_signals(args->siginfo, args->siginfo_nr, true); if (ret) goto core_restore_end; ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, false); if (ret) goto core_restore_end; restore_finish_stage(CR_STATE_RESTORE_SIGCHLD); if (args->siginfo_size) { ret = sys_munmap(args->siginfo, args->siginfo_size); if (ret < 0) { pr_err("Can't unmap signals %ld\n", ret); goto core_restore_failed; } } rst_tcp_socks_all(args->rst_tcp_socks, args->rst_tcp_socks_size); /* * Writing to last-pid is CAP_SYS_ADMIN protected, * turning off TCP repair is CAP_SYS_NED_ADMIN protected, * thus restore* creds _after_ all of the above. */ ret = restore_creds(&args->creds); futex_set_and_wake(&thread_inprogress, args->nr_threads); restore_finish_stage(CR_STATE_RESTORE_CREDS); if (ret) BUG(); /* Wait until children stop to use args->task_entries */ futex_wait_while_gt(&thread_inprogress, 1); log_set_fd(-1); /* * The code that prepared the itimers makes shure the * code below doesn't fail due to bad timing values. */ #define itimer_armed(args, i) \ (args->itimers[i].it_interval.tv_sec || \ args->itimers[i].it_interval.tv_usec) if (itimer_armed(args, 0)) sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL); if (itimer_armed(args, 1)) sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL); if (itimer_armed(args, 2)) sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL); ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE); if (ret < 0) { ret = ((long)__LINE__ << 16) | ((-ret) & 0xffff); goto core_restore_failed; } /* * Sigframe stack. */ new_sp = (long)rt_sigframe + SIGFRAME_OFFSET; /* * Prepare the stack and call for sigreturn, * pure assembly since we don't need any additional * code insns from gcc. */ ARCH_RT_SIGRETURN(new_sp); core_restore_end: futex_abort_and_wake(&task_entries->nr_in_progress); pr_err("Restorer fail %ld\n", sys_getpid()); sys_exit_group(1); return -1; core_restore_failed: ARCH_FAIL_CORE_RESTORE; return ret; }