Exemplo n.º 1
0
int main() {
    int pid;
    int fds[3];
    int spair[2];

    sys_chk(socketpair(AF_UNIX, SOCK_STREAM, 0, spair));

    sys_chk(pid = fork());
    if (pid == 0) {  // Child.
        sys_chk(fds[0] = open("/etc/passwd", O_RDONLY));
        sys_chk(lseek(fds[0], 5, SEEK_SET));
        sys_chk(fds[1] = open("/etc/group", O_RDONLY));
        sys_chk(lseek(fds[1], 10, SEEK_SET));
        sys_chk(fds[2] = open("/bin/sh", O_RDONLY));
        sys_chk(lseek(fds[2], 20, SEEK_SET));
        send_fds(spair[0], fds, 3);
        return EXIT_SUCCESS;
    } else {  // Parent.
        int *fds;
        int fds_len;
        recv_fds(spair[1], &fds, &fds_len);
        printf("parent: got fds:");
        for (int i = 0; i < fds_len; ++i) {
            printf(" %d (fp=%lld)", fds[i], (long long)lseek(fds[i], 0, SEEK_CUR));
        }
        printf("\n");
        return EXIT_SUCCESS;
    }
}
Exemplo n.º 2
0
static int send_criu_msg_with_fd(int socket_fd, CriuResp *msg, int fd)
{
	unsigned char *buf;
	int len, ret;

	len = criu_resp__get_packed_size(msg);

	buf = xmalloc(len);
	if (!buf)
		return -ENOMEM;

	if (criu_resp__pack(msg, buf) != len) {
		pr_perror("Failed packing response");
		goto err;
	}

	if (fd >= 0) {
		ret = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len);
	} else
		ret = write(socket_fd, buf, len);
	if (ret < 0) {
		pr_perror("Can't send response");
		goto err;
	}

	xfree(buf);
	return 0;
err:
	xfree(buf);
	return -1;
}
Exemplo n.º 3
0
static int drain_fds(struct parasite_drain_fd *args)
{
	int ret;

	ret = send_fds(tsock, NULL, 0,
		       args->fds, args->nr_fds, true);
	if (ret)
		pr_err("send_fds failed\n");

	return ret;
}
Exemplo n.º 4
0
int do_sendmsg(message *dev_m_in, message *dev_m_out)
{
	int minor, peer, rc, i;
	struct msg_control msg_ctrl;

#if DEBUG == 1
	static int call_count = 0;
	printf("(uds) [%d] do_sendmsg() call_count=%d\n",
					uds_minor(dev_m_in), ++call_count);
#endif

	minor = uds_minor(dev_m_in);

	memset(&msg_ctrl, '\0', sizeof(struct msg_control));

	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
					(vir_bytes) 0, (vir_bytes) &msg_ctrl,
					sizeof(struct msg_control));

	if (rc != OK) {
		return EIO;
	}

	/* locate peer */
	peer = -1;
	if (uds_fd_table[minor].type == SOCK_DGRAM) {
		if (uds_fd_table[minor].target.sun_path[0] == '\0' ||
			uds_fd_table[minor].target.sun_family != AF_UNIX) {

			return EDESTADDRREQ;
		}

		for (i = 0; i < NR_FDS; i++) {

			/* look for a SOCK_DGRAM socket that is bound on
			 * the target address
			 */
			if (uds_fd_table[i].type == SOCK_DGRAM &&
				uds_fd_table[i].addr.sun_family == AF_UNIX &&
				!strncmp(uds_fd_table[minor].target.sun_path,
				uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){

				peer = i;
				break;
			}
		}

		if (peer == -1) {
			return ENOENT;
		}
	} else {
		peer = uds_fd_table[minor].peer;
		if (peer == -1) {
			return ENOTCONN;
		}
	}

#if DEBUG == 1
	printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer);
#endif
	/* note: it's possible that there is already some file
	 * descriptors in ancillary_data if the peer didn't call
	 * recvmsg() yet. That's okay. The receiver will
	 * get the current file descriptors plus the new ones.
	 */
	rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data,
								minor);
	if (rc != OK) {
		return rc;
	}

	return send_fds(minor, &uds_fd_table[peer].ancillary_data);
}
Exemplo n.º 5
0
int main(int argc, char ** argv) {
	int listen_port = -1;
	char listen_port_str[8];
	const char * ctrl_socket_path = NULL;

	{
		int opt;
		while ((opt = getopt(argc, argv, "p:hu:")) != EOF) {
			switch (opt) {
			case 'p' :
				listen_port = atoi(optarg);
				break;
			case 'h' :
				fprintf(stderr, "%s [-p port] [-u socket-path]\n", argv[0]);
				fprintf(stderr, "default: -p 9134\n");
				exit(0);
			case 'u' :
				ctrl_socket_path = optarg;
				break;
			}
		}
		argc -= optind;
		argv += optind;
	}

	if (listen_port == -1) {
		listen_port = 9134;
	}
	sprintf(listen_port_str, "%d", listen_port);

	typedef std::vector<fd_ctx> server_sockets_t;
	server_sockets_t server_sockets;
	peer_sockets_t peer_sockets;

	fd_ctx ctrl_socket, ctrl_socket_conn;
	bool ctrl_socket_mode_listen = false;
	bool decay_mode = false;

	ctrl_socket.fd = -1;
	ctrl_socket_conn.fd = -1;

	int sockets_inherited = 0;

	int epoll = epoll_create(1024);
	if (epoll < 0) {
		VPERROR("epoll_create"); exit(1);
	}

	if (ctrl_socket_path) {
		int s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
		if (s < 0) {
			VPERROR("socket(AF_UNIX)");
			exit(1);
		}
		struct sockaddr_un sun;
		sun.sun_family = AF_UNIX;
		strncpy(sun.sun_path, ctrl_socket_path, sizeof(sun.sun_path));

		if (connect(s, (sockaddr *) &sun, sizeof(sun))) {
			if (errno == ECONNREFUSED || errno == ENOENT) {
				if (errno == ECONNREFUSED) {
					if (unlink(ctrl_socket_path) < 0) {
						fprintf(stderr, "unlink(%s): %s\n", ctrl_socket_path, strerror(errno));
						exit(1);
					}
				}
				ctrl_socket_listen(s, ctrl_socket_path);
				ctrl_socket.fd = s;
				poll_in(epoll, &ctrl_socket);
				ctrl_socket_mode_listen = true;
			} else {
				fprintf(stderr, "connect(%s): %s\n", ctrl_socket_path, strerror(errno));
			}
		} else {
			char buf[16];
			ssize_t n = send(s, "unlisten", sizeof("unlisten") - 1, 0);
			if (n < 0) {
				VPERROR("sendmsg");
				exit(1);
			} else if (n == 0) {
				fprintf(stderr, "unexpected EOF\n");
				exit(1);
			}

			// blocking read
			n = recv(s, buf, sizeof(buf), 0);
			if (strncmp(buf, "unlistening", strlen("unlistening")) != 0) {
				fprintf(stderr, "running server reported: ");
				fwrite(buf, n, 1, stderr);
				exit(1);
			}
			ctrl_socket_conn.fd = s;
			poll_in(epoll, &ctrl_socket_conn);
		}
	}

	{
		struct addrinfo hints, * ai_res;
		hints.ai_family   = AF_UNSPEC;
		hints.ai_socktype = SOCK_STREAM;
		hints.ai_protocol = IPPROTO_TCP;
		hints.ai_flags    = AI_PASSIVE;

		int r = getaddrinfo(NULL, listen_port_str, &hints, &ai_res);
		if (r) {
			fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(r));
			exit(1);
		}

		for (struct addrinfo * ai = ai_res; ai; ai = ai->ai_next) {
			int s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
			if (s < 0) {
				VPERROR("socket"); exit(1);
			}
			if (ai->ai_family == AF_INET6) {
				int on = 1;
				if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
							   (char *)&on, sizeof(on)) == -1) {
					VPERROR("setsockopt(IPV6_ONLY)");
					exit(1);
				}
			}
			{
				int on = 1;
				if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)) == -1) {
					VPERROR("setsockopt(REUSEADDR)");
					exit(1);
				}
			}
			if (bind(s, ai->ai_addr, ai->ai_addrlen) < 0) {
				VPERROR("bind"); exit(1);
			}
			if (listen(s, 50) < 0) {
				VPERROR("listen"); exit(1);
			}
			fd_ctx c;
			c.fd = s;
			c.is_server = true;
			c.protocol = ai->ai_protocol;
			char * strp = c.buf;
			int slen  = sizeof(c.buf);
			if (ai->ai_family == AF_INET6) {
				*strp++ = '[';
				slen -= 2;
			}
			get_ip_str(ai->ai_addr, strp, slen);
			if (ai->ai_family == AF_INET6) {
				strcat(c.buf, "]");
			}
			sprintf(c.buf + strlen(c.buf), ":%d", listen_port);
			server_sockets.push_back(c);
		}
		freeaddrinfo(ai_res);
	}

	for (int i = 0; i < server_sockets.size(); ++i) {
		poll_in(epoll, &server_sockets[i]);
	}

	epoll_event epoll_events[32];
	const int epoll_max_events = 32;

	fd_ctx fd_ctx_finder;
	signal(SIGUSR1, sigusr1);
	signal(SIGPIPE, SIG_IGN);

	total_sockets  = server_sockets.size();
	time_t status_time = time(NULL);

	while (total_sockets) {
		if (unlikely(got_sigusr1)) {
			// close listening sockets
			for (int i = 0; i < server_sockets.size(); ++i) {
				fprintf(stderr, "close server %s\n", server_sockets[i].buf);
				if (epoll_ctl(epoll, EPOLL_CTL_DEL, server_sockets[i].fd, NULL) < 0) {
					VPERROR("epoll_ctl");
				}
				close(server_sockets[i].fd);
				--total_sockets;
			}
			got_sigusr1 = false;
		}
		if (unlikely(status_time + 5 < time(NULL))) {
			fprintf(stderr, "%d connections, %d identified peers\n", total_connections - server_sockets.size(), peer_sockets.size());
			status_time = time(NULL);
		}

		int ep_num = epoll_wait(epoll, epoll_events, epoll_max_events, 1000);
		if (unlikely(ep_num < 0)) {
			if (errno == EINTR) continue;
			VPERROR("epoll_wait"); continue;
		}
		bool epoll_restart = false;
		for (int epi = 0; epi < ep_num && ! epoll_restart; ++epi) {
			fd_ctx * ctxp = (fd_ctx *) epoll_events[epi].data.ptr;

			if (unlikely(ctxp == &ctrl_socket)) {
				sockaddr_storage ss;
				socklen_t sl = sizeof(ss);
				
				int nsock = accept(ctxp->fd, (sockaddr *) &ss, &sl);
				if (nsock < 0) {
					VPERROR("accept"); continue;
				}
				epoll_event ev;
				ev.events   = EPOLLIN;
				ev.data.ptr = (void *) &ctrl_socket_conn;
				if (epoll_ctl(epoll, EPOLL_CTL_ADD, nsock, &ev) < 0) {
					VPERROR("epoll_ctl");
					close(nsock);
					continue;
				}

				// we only ever accept one ctrl client
				if (epoll_ctl(epoll, EPOLL_CTL_DEL, ctrl_socket.fd, NULL) < 0) {
					VPERROR("epoll_ctl");
					close(nsock);
					continue;
				}
				ctrl_socket_conn.fd = nsock;
			} else if (unlikely(ctxp == &ctrl_socket_conn)) {
				if (ctrl_socket_mode_listen) {
					char buf[1024];

					int n = read(ctxp->fd, buf, sizeof(buf));
					if (n < 0) {
						if (errno == EINTR || errno == EAGAIN) continue;
						VPERROR("read");
						close(ctxp->fd);
						poll_in(epoll, &ctrl_socket);
					} else if (n == 0) {
						close(ctxp->fd);
						poll_in(epoll, &ctrl_socket);
					} else {
						if (strncmp(buf, "unlisten", sizeof("unlisten") - 1) == 0) {
							for (int i = 0; i < server_sockets.size(); ++i) {
								fprintf(stderr, "close server %s\n", server_sockets[i].buf);
								if (epoll_ctl(epoll, EPOLL_CTL_DEL, server_sockets[i].fd, NULL) < 0) {
									VPERROR("epoll_ctl");
								}
								close(server_sockets[i].fd);
								--total_sockets;
							}
							if (write(ctrl_socket_conn.fd, "unlistening", sizeof("unlistening") - 1) < 0) {
								VPERROR("write");
							} else {
								int nsent = 0;
								
								do {
									nsent = send_fds(ctrl_socket_conn.fd, epoll, peer_sockets.begin(), peer_sockets.end(), &peer_sockets);
									if (nsent) {
										fprintf(stderr, "bulk send: %d\n", nsent);
									}
								} while (nsent && ! peer_sockets.empty());
								epoll_restart = true;
								decay_mode = true;
							}
						}
					}
				} else {
					msghdr msg;
					iovec iov;
					optional_buf<MAX_CONTROL_MESSAGE_CONTROL_SIZE, (MAX_CONTROL_MESSAGE_TOTAL_SIZE > FDCTX_BUFFER_SIZE)> control;
					char * controlp = control.placeholder ?
						ctxp->buf + MAX_CONTROL_MESSAGE_SIZE :
						control.value;
					optional_buf<MAX_CONTROL_MESSAGE_SIZE, (MAX_CONTROL_MESSAGE_SIZE > FDCTX_BUFFER_SIZE)> buf;
					char * bufp = buf.placeholder ?	ctxp->buf : control.value;

					iov.iov_base = bufp;
					iov.iov_len  = MAX_CONTROL_MESSAGE_SIZE;

					msg.msg_name       = NULL;
					msg.msg_namelen    = 0;
					msg.msg_iov        = &iov;
					msg.msg_iovlen     = 1;
					msg.msg_control    = (void *) controlp;
					msg.msg_controllen = MAX_CONTROL_MESSAGE_CONTROL_SIZE;
					msg.msg_flags      = 0;

					int n = recvmsg(ctxp->fd, &msg, 0);
					if (n < 0) {
						VPERROR("recvmsg");
					} else if (n == 0) {
						fprintf(stderr, "unexpected close\n");
						close(ctxp->fd);
					} else {
						if (strncmp((const char *) iov.iov_base, "desc", std::min(4, n)) == 0) {
							cmsghdr * cmp = CMSG_FIRSTHDR(&msg);
							if (cmp->cmsg_level != SOL_SOCKET || cmp->cmsg_type != SCM_RIGHTS) {
								fprintf(stderr, "malformed control message: wrong type\n");
								exit(1);
							}

							int * uidp = (int *) ((char *) iov.iov_base + 4);
							int * uidpend = (int *) ((char *) iov.iov_base + n);

							int fd_count = 0;
							for (; uidp < uidpend; ++uidp, ++fd_count) {
								int fd = * ((int *) CMSG_DATA(cmp) + fd_count);
								++sockets_inherited;
								++total_sockets;
								fd_ctx * cp = new fd_ctx;
								cp->fd = fd;
								cp->faf_uid = *uidp;
								cp->is_server = false;
								cp->protocol = IPPROTO_TCP;
								cp->buf_len = 0;
								epoll_event ev;
								ev.events = EPOLLIN;
								ev.data.ptr = (void *) cp;
								if (epoll_ctl(epoll, EPOLL_CTL_ADD, cp->fd, &ev) < 0) {
									VPERROR("epoll_ctl");
									--total_sockets;
									close(cp->fd);
									delete cp;
								}
								if (cp->faf_uid != -1) {
									peer_sockets.insert(cp);
								}
							}
						} else if (strncmp((const char *) iov.iov_base, "exit", std::min(4, n)) == 0) {
							close(ctxp->fd);
							int s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
							if (s < 0) {
								VPERROR("socket(PF_UNIX)");
							} else {
								ctrl_socket_listen(s, ctrl_socket_path);
								ctrl_socket.fd = s;
								poll_in(epoll, &ctrl_socket);
								ctrl_socket_mode_listen = true;
							}
							fprintf(stderr, "%d sockets inherited from the dead\n", sockets_inherited);
						}
					}
				}
			} else if (unlikely(ctxp->is_server && ctxp->protocol == IPPROTO_TCP)) {
				sockaddr_storage saddr;
				socklen_t saddrlen = sizeof(saddr);
				int nsock = accept(ctxp->fd, (sockaddr *) &saddr, &saddrlen);
				if (nsock < 0) {
					VPERROR("accept");
				} else {
					++total_sockets;
					fd_ctx * cp = new fd_ctx;
					cp->fd = nsock;
					cp->faf_uid = -1;
					cp->is_server = false;
					cp->protocol = IPPROTO_TCP;
					cp->buf_len = 0;

					epoll_event ev;
					ev.events = EPOLLIN;
					ev.data.ptr = (void *) cp;
					if (epoll_ctl(epoll, EPOLL_CTL_ADD, nsock, &ev) < 0) {
						VPERROR("epoll_ctl");
						--total_sockets;
						close(nsock);
						delete cp;
					}
				}
			} else {
				if (unlikely(decay_mode && ctxp->buf_len == 0)) {
					fprintf(stderr, "single send\n");
					send_fd(ctrl_socket_conn.fd, epoll, ctxp);
					if (ctxp->faf_uid != -1) {
						peer_sockets.erase(ctxp);
					}
					continue; // -> next epoll result
				}

				int n = read(ctxp->fd, ctxp->buf + ctxp->buf_len, PEER_CTX_BUF_SIZE - ctxp->buf_len);
				if (unlikely(n < 0)) {
					if (errno != ECONNRESET && errno != EAGAIN && errno != EINTR) {
						VPERROR("read");
					}
					continue;
				} else if (unlikely(n == 0)) {
					close(ctxp->fd);
					--total_sockets;
					if (ctxp->faf_uid != -1) {
						peer_sockets.erase(ctxp);
					}
					ctxp->remove_myself_from_peer_caches();
					--ctxp->refcount;
					if (ctxp->refcount == 0) {
						delete ctxp;
					} else {
						ctxp->faf_uid = -1;
					}
				} else {
					ctxp->buf_len += n;
					char * buf_head = ctxp->buf;
					bool postprocess = true;

					while (buf_head < ctxp->buf + ctxp->buf_len) {
						proxy_msg_header * h = (proxy_msg_header *) buf_head;
						const int buf_len = ctxp->buf + ctxp->buf_len - buf_head;
						const int in_msg_size = ntohl(h->size);

						if (buf_len < 4) {
							break;
						}
						
						if (unlikely(buf_len > PEER_CTX_BUF_SIZE)) {
							// message to big
							if (epoll_ctl(epoll, EPOLL_CTL_DEL, ctxp->fd, NULL) < 0) {
								VPERROR("epoll_ctl");
							}
							close(ctxp->fd);
							--total_sockets;
							if (ctxp->faf_uid != -1) {
								peer_sockets.erase(ctxp);
							}
							ctxp->remove_myself_from_peer_caches();
							--ctxp->refcount;
							if (ctxp->refcount == 0) {
								delete ctxp;
							} else {
								ctxp->faf_uid = -1;
							}
							postprocess = false;
							break;
						}

						if (in_msg_size + 4 > buf_len) {
							break;
						}

						if (unlikely(ctxp->faf_uid == -1)) {
							proxy_msg_header_set_uid * hu = (proxy_msg_header_set_uid *) h;
							ctxp->faf_uid = ntohs(hu->uid);
							peer_sockets.insert(ctxp);

							buf_head += in_msg_size + 4;
							continue; // -> next message from this fd_ctx
						}

						// in decay mode we always drop, because we expect our
						// caches and refcounts to be inconsistent
						// we can decay without bookkeeping if we never send any packets
						// out (== we never expect a context to exists unless epoll still
						// knows about it)
						if (! decay_mode) {
							int uid = ntohs(h->destuid);

							fd_ctx * peer = ctxp->peers.find(uid);

							if (unlikely(! peer)) {
								fd_ctx_finder.faf_uid = uid;
								peer_sockets_t::iterator iter = peer_sockets.find(&fd_ctx_finder);
								if (iter != peer_sockets.end()) {
									peer = *iter;
									ctxp->peers.add(peer);
								} else {
									buf_head += in_msg_size + 4;
									continue;
								}
							}
							
							int in_port = ntohs(h->port);
							proxy_msg_header_to_peer * hout = (proxy_msg_header_to_peer *) (buf_head + OUT_HEADER_OFFSET_ADJ);
							hout->port = htons(in_port);
							const int out_size = in_msg_size - OUT_HEADER_OFFSET_ADJ;
							hout->size = htonl(out_size);
							
							{
								int n = write(peer->fd, (char *) hout, out_size + 4);
								if (unlikely(n < 0)) {
									if (errno != ECONNRESET && errno != EPIPE) {
										VPERROR("write");
									}
								} else if (unlikely(n != out_size + 4)) {
									fprintf(stderr, "short write (%d of %d\n", n, out_size + 4);
								}
							}
						}
						buf_head += in_msg_size + 4;
					}
					if (likely(postprocess)) {
						int new_buflen = ctxp->buf + ctxp->buf_len - buf_head;

						if (unlikely(new_buflen && ctxp->buf != buf_head)) {
							for (char * p = ctxp->buf; buf_head < ctxp->buf + ctxp->buf_len; ++p, ++buf_head) {
								*p = *buf_head;
							}
						}
						ctxp->buf_len = new_buflen;
					}
					// we want to get rid of clients as soon as possible and
					// dont wait for them to send the next message to trigger it
					if (unlikely(decay_mode && ctxp->buf_len == 0)) {
						send_fd(ctrl_socket_conn.fd, epoll, ctxp);
						if (ctxp->faf_uid != -1) {
							peer_sockets.erase(ctxp);
						}
					}
				}
			}
		}
	}
	if (decay_mode && ctrl_socket_path) {
		close(ctrl_socket.fd);
		unlink(ctrl_socket_path);
		if (write(ctrl_socket_conn.fd, "exit", strlen("exit")) < 0) {
			VPERROR("send");
		}
	}
	fprintf(stderr, "exit due to %d sockets left to serve\n", total_sockets);
	exit(0);
}
Exemplo n.º 6
0
int send_fd(int ctrlsock, int epoll, fd_ctx * ctxp) {
	return send_fds(ctrlsock, epoll, &ctxp, &ctxp + 1, (dummy_erase_container<fd_ctx *> *) NULL);
}