int restore_socket_opts(int sk, SkOptsEntry *soe) { int ret = 0, val; struct timeval tv; /* In kernel a bufsize value is doubled. */ u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; pr_info("%d restore sndbuf %d rcv buf %d\n", sk, soe->so_sndbuf, soe->so_rcvbuf); /* setsockopt() multiplies the input values by 2 */ ret |= userns_call(sk_setbufs, UNS_ASYNC, bufs, sizeof(bufs), sk); if (soe->has_so_priority) { pr_debug("\trestore priority %d for socket\n", soe->so_priority); ret |= restore_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority); } if (soe->has_so_rcvlowat) { pr_debug("\trestore rcvlowat %d for socket\n", soe->so_rcvlowat); ret |= restore_opt(sk, SOL_SOCKET, SO_RCVLOWAT, &soe->so_rcvlowat); } if (soe->has_so_mark) { pr_debug("\trestore mark %d for socket\n", soe->so_mark); ret |= restore_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark); } if (soe->has_so_passcred && soe->so_passcred) { val = 1; pr_debug("\tset passcred for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); } if (soe->has_so_passsec && soe->so_passsec) { val = 1; pr_debug("\tset passsec for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); } if (soe->has_so_dontroute && soe->so_dontroute) { val = 1; pr_debug("\tset dontroute for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val); } if (soe->has_so_no_check && soe->so_no_check) { val = 1; pr_debug("\tset no_check for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val); } tv.tv_sec = soe->so_snd_tmo_sec; tv.tv_usec = soe->so_snd_tmo_usec; ret |= restore_opt(sk, SOL_SOCKET, SO_SNDTIMEO, &tv); tv.tv_sec = soe->so_rcv_tmo_sec; tv.tv_usec = soe->so_rcv_tmo_usec; ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv); ret |= restore_bound_dev(sk, soe); ret |= restore_socket_filter(sk, soe); /* The restore of SO_REUSEADDR depends on type of socket */ return ret; }
/* * Set sizes of buffers to maximum and prevent blocking * Caller of this fn should call other socket restoring * routines to drop the non-blocking and set proper send * and receive buffers. */ int restore_prepare_socket(int sk) { int flags; /* In kernel a bufsize has type int and a value is doubled. */ u32 maxbuf[2] = { INT_MAX / 2, INT_MAX / 2 }; if (userns_call(sk_setbufs, 0, maxbuf, sizeof(maxbuf), sk)) return -1; /* Prevent blocking on restore */ flags = fcntl(sk, F_GETFL, 0); if (flags == -1) { pr_perror("Unable to get flags for %d", sk); return -1; } if (fcntl(sk, F_SETFL, flags | O_NONBLOCK) ) { pr_perror("Unable to set O_NONBLOCK for %d", sk); return -1; } return 0; }
int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns) { int i, fd, ret; struct sysctl_userns_req *userns_req; struct sysctl_req *cur; if (nr_req == 0) return 0; if (ns & ~KNOWN_NS_MASK) { pr_err("don't know how to restore some namespaces in %u\n", ns); return -1; } /* The way sysctl files behave on open/write depends on the namespace * they correspond to. If we don't want to interact with something in a * namespace (e.g. kernel/cap_last_cap is global), we can do this from * the current process. Similarly, if we're accessing net namespaces, * we can just do the operation from our current process, since * anything with CAP_NET_ADMIN can write to the net/ sysctls, and we * still have that even when restoring in a user ns. * * For IPC/UTS, we restore them as described above. * * For read operations, we need to copy the values back to return. * Fortunately, we only do read on dump (or global reads on restore), * so we can do those in process as well. */ if (!ns || ns & CLONE_NEWNET || op == CTL_READ) return __nonuserns_sysctl_op(req, nr_req, op); /* * In order to avoid lots of opening of /proc/sys for each struct sysctl_req, * we encode each array of sysctl_reqs into one contiguous region of memory so * it can be passed via userns_call if necessary. It looks like this: * * struct sysctl_userns_req struct sysctl_req name arg * --------------------------------------------------------------------------- * | op | nr_req | reqs | <fields> | name | arg | "the name" | "the arg" ... * --------------------------------------------------------------------------- * |____^ |______|__^ ^ * |_______________| */ userns_req = alloca(MAX_UNSFD_MSG_SIZE); userns_req->op = op; userns_req->nr_req = nr_req; userns_req->ns = ns; userns_req->reqs = (struct sysctl_req *) (&userns_req[1]); cur = userns_req->reqs; for (i = 0; i < nr_req; i++) { int arg_len = sysctl_userns_arg_size(req[i].type); int name_len = strlen(req[i].name) + 1; int total_len = sizeof(*cur) + arg_len + name_len; if (((char *) cur) + total_len >= ((char *) userns_req) + MAX_UNSFD_MSG_SIZE) { pr_err("sysctl msg %s too big: %d\n", req[i].name, total_len); return -1; } /* copy over the non-pointer fields */ cur->type = req[i].type; cur->flags = req[i].flags; cur->name = (char *) &cur[1]; strcpy(cur->name, req[i].name); cur->arg = cur->name + name_len; memcpy(cur->arg, req[i].arg, arg_len); cur = (struct sysctl_req *) (((char *) cur) + total_len); } fd = open_proc(PROC_SELF, "ns"); if (fd < 0) return -1; ret = userns_call(__userns_sysctl_op, 0, userns_req, MAX_UNSFD_MSG_SIZE, fd); close(fd); return ret; }
static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long oflags, char *path) { int ret, flags; flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL); /* * For pages images dedup we need to open images read-write on * restore, that may require proper capabilities, so we ask * usernsd to do it for us */ if (root_ns_mask & CLONE_NEWUSER && type == CR_FD_PAGES && oflags & O_RDWR) { struct openat_args pa = { .flags = flags, .err = 0, .mode = CR_FD_PERM, }; snprintf(pa.path, PATH_MAX, "%s", path); ret = userns_call(userns_openat, UNS_FDOUT, &pa, sizeof(struct openat_args), dfd); if (ret < 0) errno = pa.err; } else ret = openat(dfd, path, flags, CR_FD_PERM); if (ret < 0) { if (!(flags & O_CREAT) && (errno == ENOENT || ret == -ENOENT)) { pr_info("No %s image\n", path); img->_x.fd = EMPTY_IMG_FD; goto skip_magic; } pr_perror("Unable to open %s", path); goto err; } img->_x.fd = ret; if (oflags & O_NOBUF) bfd_setraw(&img->_x); else { if (flags == O_RDONLY) ret = bfdopenr(&img->_x); else ret = bfdopenw(&img->_x); if (ret) goto err; } if (imgset_template[type].magic == RAW_IMAGE_MAGIC) goto skip_magic; if (flags == O_RDONLY) ret = img_check_magic(img, oflags, type, path); else ret = img_write_magic(img, oflags, type); if (ret) goto err; skip_magic: return 0; err: return -1; }