/** * Create a socket for inbound (port-forwarded) connections to * src_addr (port is part of sockaddr, so not a separate argument). * * The socket is non-blocking and TCP sockets has SIGPIPE disabled if * possible. On Linux it's not possible and should be disabled for * each send(2) individually. * * TODO?: Support v6-mapped v4 so that user can specify she wants * "udp" and get both versions? */ SOCKET proxy_bound_socket(int sdom, int stype, struct sockaddr *src_addr) { SOCKET s; int on; const socklen_t onlen = sizeof(on); int status; int sockerr; s = proxy_create_socket(sdom, stype); if (s == INVALID_SOCKET) { return INVALID_SOCKET; } DPRINTF(("socket %d\n", s)); on = 1; status = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, onlen); if (status < 0) { /* not good, but not fatal */ DPRINTF(("SO_REUSEADDR: %R[sockerr]\n", SOCKERRNO())); } status = bind(s, src_addr, sdom == PF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (status == SOCKET_ERROR) { sockerr = SOCKERRNO(); DPRINTF(("bind: %R[sockerr]\n", sockerr)); closesocket(s); SET_SOCKERRNO(sockerr); return INVALID_SOCKET; } if (stype == SOCK_STREAM) { status = listen(s, 5); if (status == SOCKET_ERROR) { sockerr = SOCKERRNO(); DPRINTF(("listen: %R[sockerr]\n", sockerr)); closesocket(s); SET_SOCKERRNO(sockerr); return INVALID_SOCKET; } } return s; }
/** * Forward request to the req::residx resolver in the pxdns::resolvers * array of upstream resolvers. * * Returns 1 on success, 0 on failure. */ static int pxdns_forward_outbound(struct pxdns *pxdns, struct request *req) { union sockaddr_inet *resolver; ssize_t nsent; DPRINTF2(("%s: req %p: sending to resolver #%lu\n", __func__, (void *)req, (unsigned long)req->residx)); LWIP_ASSERT1(req->generation == pxdns->generation); LWIP_ASSERT1(req->residx < pxdns->nresolvers); resolver = &pxdns->resolvers[req->residx]; if (resolver->sa.sa_family == AF_INET) { nsent = sendto(pxdns->sock4, req->data, req->size, 0, &resolver->sa, sizeof(resolver->sin)); } else if (resolver->sa.sa_family == AF_INET6) { if (pxdns->sock6 != INVALID_SOCKET) { nsent = sendto(pxdns->sock6, req->data, req->size, 0, &resolver->sa, sizeof(resolver->sin6)); } else { /* shouldn't happen, we should have weeded out IPv6 resolvers */ return 0; } } else { /* shouldn't happen, we should have weeded out unsupported families */ return 0; } if ((size_t)nsent == req->size) { return 1; /* sent */ } if (nsent < 0) { DPRINTF2(("%s: send: %R[sockerr]\n", __func__, SOCKERRNO())); } else { DPRINTF2(("%s: sent only %lu of %lu\n", __func__, (unsigned long)nsent, (unsigned long)req->size)); } return 0; /* not sent, caller will retry as necessary */ }
ssize_t pollmgr_chan_send(int slot, void *buf, size_t nbytes) { SOCKET fd; ssize_t nsent; if (slot >= POLLMGR_SLOT_FIRST_DYNAMIC) { return -1; } fd = pollmgr.chan[slot][POLLMGR_CHFD_WR]; nsent = send(fd, buf, (int)nbytes, 0); if (nsent == SOCKET_ERROR) { DPRINTF(("send on chan %d: %R[sockerr]\n", slot, SOCKERRNO())); return -1; } else if ((size_t)nsent != nbytes) { DPRINTF(("send on chan %d: datagram truncated to %u bytes", slot, (unsigned int)nsent)); return -1; } return nsent; }
static int pxudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) { struct pxudp *pxudp; struct pbuf *p; ssize_t nread; err_t error; pxudp = (struct pxudp *)handler->data; LWIP_ASSERT1(handler == &pxudp->pmhdl); LWIP_ASSERT1(fd == pxudp->sock); LWIP_UNUSED_ARG(fd); if (revents & ~(POLLIN|POLLERR)) { DPRINTF(("%s: unexpected revents 0x%x\n", __func__, revents)); return pxudp_schedule_delete(pxudp); } /* * XXX: AFAICS, there's no way to match the error with the * outgoing datagram that triggered it, since we do non-blocking * sends from lwip thread. */ if (revents & POLLERR) { int sockerr = -1; socklen_t optlen = (socklen_t)sizeof(sockerr); int status; status = getsockopt(pxudp->sock, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &optlen); if (status < 0) { DPRINTF(("%s: sock %d: SO_ERROR failed:%R[sockerr]\n", __func__, pxudp->sock, SOCKERRNO())); } else { DPRINTF(("%s: sock %d: %R[sockerr]\n", __func__, pxudp->sock, sockerr)); } } if ((revents & POLLIN) == 0) { return POLLIN; } nread = recv(pxudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); if (nread == SOCKET_ERROR) { DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); return POLLIN; } p = pbuf_alloc(PBUF_RAW, (u16_t)nread, PBUF_RAM); if (p == NULL) { DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); return POLLIN; } error = pbuf_take(p, pollmgr_udpbuf, (u16_t)nread); if (error != ERR_OK) { DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); pbuf_free(p); return POLLIN; } error = sys_mbox_trypost(&pxudp->inmbox, p); if (error != ERR_OK) { pbuf_free(p); return POLLIN; } proxy_lwip_post(&pxudp->msg_inbound); return POLLIN; }
static void pxudp_pcb_forward_outbound(struct pxudp *pxudp, struct pbuf *p, ip_addr_t *addr, u16_t port) { int status; LWIP_UNUSED_ARG(addr); LWIP_UNUSED_ARG(port); if (!pxudp->is_mapped && pxudp_ttl_expired(p)) { return; } if (!ip_current_is_v6()) { /* IPv4 */ const struct ip_hdr *iph = ip_current_header(); int ttl, tos, df; /* * Different OSes have different socket options for DF. * Unlike pxping.c, we can't use IP_HDRINCL here as it's only * valid for SOCK_RAW. */ # define USE_DF_OPTION(_Optname) \ const int dfopt = _Optname; \ const char * const dfoptname = #_Optname; #if defined(IP_MTU_DISCOVER) /* Linux */ USE_DF_OPTION(IP_MTU_DISCOVER); #elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */ USE_DF_OPTION(IP_DONTFRAG); #elif defined(IP_DONTFRAGMENT) /* Windows */ USE_DF_OPTION(IP_DONTFRAGMENT); #else USE_DF_OPTION(0); #endif ttl = IPH_TTL(iph); if (!pxudp->is_mapped) { LWIP_ASSERT1(ttl > 1); --ttl; } if (ttl != pxudp->ttl) { status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TTL, (char *)&ttl, sizeof(ttl)); if (RT_LIKELY(status == 0)) { pxudp->ttl = ttl; } else { DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO())); } } tos = IPH_TOS(iph); if (tos != pxudp->tos) { status = setsockopt(pxudp->sock, IPPROTO_IP, IP_TOS, (char *)&tos, sizeof(tos)); if (RT_LIKELY(status == 0)) { pxudp->tos = tos; } else { DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO())); } } if (dfopt) { df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0; #if defined(IP_MTU_DISCOVER) df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; #endif if (df != pxudp->df) { status = setsockopt(pxudp->sock, IPPROTO_IP, dfopt, (char *)&df, sizeof(df)); if (RT_LIKELY(status == 0)) { pxudp->df = df; } else { DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO())); } } } } else { /* IPv6 */ const struct ip6_hdr *iph = ip6_current_header(); int ttl; ttl = IP6H_HOPLIM(iph); if (!pxudp->is_mapped) { LWIP_ASSERT1(ttl > 1); --ttl; } if (ttl != pxudp->ttl) { status = setsockopt(pxudp->sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS, (char *)&ttl, sizeof(ttl)); if (RT_LIKELY(status == 0)) { pxudp->ttl = ttl; } else { DPRINTF(("IPV6_UNICAST_HOPS: %R[sockerr]\n", SOCKERRNO())); } } } if (pxudp->pcb->local_port == 53) { ++pxudp->count; } proxy_sendto(pxudp->sock, p, NULL, 0); pbuf_free(p); }
int proxy_sendto(SOCKET sock, struct pbuf *p, void *name, size_t namelen) { struct pbuf *q; size_t i, clen; #ifndef RT_OS_WINDOWS struct msghdr mh; ssize_t nsent; #else DWORD nsent; #endif int rc; IOVEC fixiov[8]; /* fixed size (typical case) */ const size_t fixiovsize = sizeof(fixiov)/sizeof(fixiov[0]); IOVEC *dyniov; /* dynamically sized */ IOVEC *iov; int error = 0; /* * Static iov[] is usually enough since UDP protocols use small * datagrams to avoid fragmentation, but be prepared. */ clen = pbuf_clen(p); if (clen > fixiovsize) { /* * XXX: TODO: check that clen is shorter than IOV_MAX */ dyniov = (IOVEC *)malloc(clen * sizeof(*dyniov)); if (dyniov == NULL) { error = -errno; /* sic: not a socket error */ goto out; } iov = dyniov; } else { dyniov = NULL; iov = fixiov; } for (q = p, i = 0; i < clen; q = q->next, ++i) { LWIP_ASSERT1(q != NULL); IOVEC_SET_BASE(iov[i], q->payload); IOVEC_SET_LEN(iov[i], q->len); } #ifndef RT_OS_WINDOWS memset(&mh, 0, sizeof(mh)); mh.msg_name = name; mh.msg_namelen = namelen; mh.msg_iov = iov; mh.msg_iovlen = clen; nsent = sendmsg(sock, &mh, 0); rc = (nsent >= 0) ? 0 : SOCKET_ERROR; #else rc = WSASendTo(sock, iov, (DWORD)clen, &nsent, 0, name, (int)namelen, NULL, NULL); #endif if (rc == SOCKET_ERROR) { error = SOCKERRNO(); DPRINTF(("%s: socket %d: sendmsg: %R[sockerr]\n", __func__, sock, error)); error = -error; } out: if (dyniov != NULL) { free(dyniov); } return error; }
/** * Create a socket for outbound connection to dst_addr:dst_port. * * The socket is non-blocking and TCP sockets has SIGPIPE disabled if * possible. On Linux it's not possible and should be disabled for * each send(2) individually. */ SOCKET proxy_connected_socket(int sdom, int stype, ipX_addr_t *dst_addr, u16_t dst_port) { struct sockaddr_in6 dst_sin6; struct sockaddr_in dst_sin; struct sockaddr *pdst_sa; socklen_t dst_sa_len; void *pdst_addr; const struct sockaddr *psrc_sa; socklen_t src_sa_len; int status; int sockerr; SOCKET s; LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6); LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM); DPRINTF(("---> %s ", stype == SOCK_STREAM ? "TCP" : "UDP")); if (sdom == PF_INET6) { pdst_sa = (struct sockaddr *)&dst_sin6; pdst_addr = (void *)&dst_sin6.sin6_addr; memset(&dst_sin6, 0, sizeof(dst_sin6)); #if HAVE_SA_LEN dst_sin6.sin6_len = #endif dst_sa_len = sizeof(dst_sin6); dst_sin6.sin6_family = AF_INET6; memcpy(&dst_sin6.sin6_addr, &dst_addr->ip6, sizeof(ip6_addr_t)); dst_sin6.sin6_port = htons(dst_port); DPRINTF(("[%RTnaipv6]:%d ", &dst_sin6.sin6_addr, dst_port)); } else { /* sdom = PF_INET */ pdst_sa = (struct sockaddr *)&dst_sin; pdst_addr = (void *)&dst_sin.sin_addr; memset(&dst_sin, 0, sizeof(dst_sin)); #if HAVE_SA_LEN dst_sin.sin_len = #endif dst_sa_len = sizeof(dst_sin); dst_sin.sin_family = AF_INET; dst_sin.sin_addr.s_addr = dst_addr->ip4.addr; /* byte-order? */ dst_sin.sin_port = htons(dst_port); DPRINTF(("%RTnaipv4:%d ", dst_sin.sin_addr.s_addr, dst_port)); } s = proxy_create_socket(sdom, stype); if (s == INVALID_SOCKET) { return INVALID_SOCKET; } DPRINTF(("socket %d\n", s)); /* TODO: needs locking if dynamic modifyvm is allowed */ if (sdom == PF_INET6) { psrc_sa = (const struct sockaddr *)g_proxy_options->src6; src_sa_len = sizeof(struct sockaddr_in6); } else { psrc_sa = (const struct sockaddr *)g_proxy_options->src4; src_sa_len = sizeof(struct sockaddr_in); } if (psrc_sa != NULL) { status = bind(s, psrc_sa, src_sa_len); if (status == SOCKET_ERROR) { sockerr = SOCKERRNO(); DPRINTF(("socket %d: bind: %R[sockerr]\n", s, sockerr)); closesocket(s); SET_SOCKERRNO(sockerr); return INVALID_SOCKET; } } status = connect(s, pdst_sa, dst_sa_len); if (status == SOCKET_ERROR #if !defined(RT_OS_WINDOWS) && SOCKERRNO() != EINPROGRESS #else && SOCKERRNO() != EWOULDBLOCK #endif ) { sockerr = SOCKERRNO(); DPRINTF(("socket %d: connect: %R[sockerr]\n", s, sockerr)); closesocket(s); SET_SOCKERRNO(sockerr); return INVALID_SOCKET; } return s; }
/** * Create a non-blocking socket. Disable SIGPIPE for TCP sockets if * possible. On Linux it's not possible and should be disabled for * each send(2) individually. */ static SOCKET proxy_create_socket(int sdom, int stype) { SOCKET s; int stype_and_flags; int status; LWIP_UNUSED_ARG(status); /* depends on ifdefs */ stype_and_flags = stype; #if defined(SOCK_NONBLOCK) stype_and_flags |= SOCK_NONBLOCK; #endif /* * Disable SIGPIPE on disconnected socket. It might be easier to * forgo it and just use MSG_NOSIGNAL on each send*(2), since we * have to do it for Linux anyway, but Darwin does NOT have that * flag (but has SO_NOSIGPIPE socket option). */ #if !defined(SOCK_NOSIGPIPE) && !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL) #if 0 /* XXX: Solaris has neither, the program should ignore SIGPIPE globally */ #error Need a way to disable SIGPIPE on connection oriented sockets! #endif #endif #if defined(SOCK_NOSIGPIPE) if (stype == SOCK_STREAM) { stype_and_flags |= SOCK_NOSIGPIPE; } #endif s = socket(sdom, stype_and_flags, 0); if (s == INVALID_SOCKET) { DPRINTF(("socket: %R[sockerr]\n", SOCKERRNO())); return INVALID_SOCKET; } #if defined(RT_OS_WINDOWS) { u_long mode = 1; status = ioctlsocket(s, FIONBIO, &mode); if (status == SOCKET_ERROR) { DPRINTF(("FIONBIO: %R[sockerr]\n", SOCKERRNO())); closesocket(s); return INVALID_SOCKET; } } #elif !defined(SOCK_NONBLOCK) { int sflags; sflags = fcntl(s, F_GETFL, 0); if (sflags < 0) { DPRINTF(("F_GETFL: %R[sockerr]\n", SOCKERRNO())); closesocket(s); return INVALID_SOCKET; } status = fcntl(s, F_SETFL, sflags | O_NONBLOCK); if (status < 0) { DPRINTF(("O_NONBLOCK: %R[sockerr]\n", SOCKERRNO())); closesocket(s); return INVALID_SOCKET; } } #endif #if !defined(SOCK_NOSIGPIPE) && defined(SO_NOSIGPIPE) if (stype == SOCK_STREAM) { int on = 1; const socklen_t onlen = sizeof(on); status = setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &on, onlen); if (status < 0) { DPRINTF(("SO_NOSIGPIPE: %R[sockerr]\n", SOCKERRNO())); closesocket(s); return INVALID_SOCKET; } } #endif #if defined(RT_OS_WINDOWS) /* * lwIP only holds one packet of "refused data" for us. Proxy * relies on OS socket send buffer and doesn't do its own * buffering. Unfortunately on Windows send buffer is very small * (8K by default) and is not dynamically adpated by the OS it * seems. So a single large write will fill it up and that will * make lwIP drop segments, causing guest TCP into pathologic * resend patterns. As a quick and dirty fix just bump it up. */ if (stype == SOCK_STREAM) { int sndbuf; socklen_t optlen = sizeof(sndbuf); status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen); if (status == 0) { if (sndbuf < 64 * 1024) { sndbuf = 64 * 1024; status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, optlen); if (status != 0) { DPRINTF(("SO_SNDBUF: setsockopt: %R[sockerr]\n", SOCKERRNO())); } } } else { DPRINTF(("SO_SNDBUF: getsockopt: %R[sockerr]\n", SOCKERRNO())); } } #endif return s; }
static int pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) { struct pxdns *pxdns; struct request *req; ssize_t nread; err_t error; u16_t id; pxdns = (struct pxdns *)handler->data; LWIP_ASSERT1(handler == &pxdns->pmhdl4 || handler == &pxdns->pmhdl6); LWIP_ASSERT1(fd == (handler == &pxdns->pmhdl4 ? pxdns->sock4 : pxdns->sock6)); if (revents & ~(POLLIN|POLLERR)) { DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents)); return POLLIN; } if (revents & POLLERR) { int sockerr = -1; socklen_t optlen = (socklen_t)sizeof(sockerr); int status; status = getsockopt(fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &optlen); if (status < 0) { DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n", __func__, fd, SOCKERRNO())); } else { DPRINTF(("%s: sock %d: %R[sockerr]\n", __func__, fd, sockerr)); } } if ((revents & POLLIN) == 0) { return POLLIN; } nread = recv(fd, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); if (nread < 0) { DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO())); return POLLIN; } /* check for minimum dns packet length */ if (nread < 12) { DPRINTF2(("%s: short reply %lu bytes\n", __func__, (unsigned long)nread)); return POLLIN; } /* XXX: shall we proxy back RCODE=Refused responses? */ memcpy(&id, pollmgr_udpbuf, sizeof(id)); req = pxdns_request_find(pxdns, id); if (req == NULL) { DPRINTF2(("%s: orphaned reply for %d\n", __func__, id)); ++pxdns->late_answers; return POLLIN; } DPRINTF2(("%s: reply for req=%p: id %d -> client id %d\n", __func__, (void *)req, req->id, req->client_id)); req->reply = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM); if (req->reply == NULL) { DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); pxdns_request_free(req); return POLLIN; } memcpy(pollmgr_udpbuf, &req->client_id, sizeof(req->client_id)); error = pbuf_take(req->reply, pollmgr_udpbuf, nread); if (error != ERR_OK) { DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); pxdns_request_free(req); return POLLIN; } proxy_lwip_post(&req->msg_reply); return POLLIN; }
int pollmgr_init(void) { struct pollfd *newfds; struct pollmgr_handler **newhdls; nfds_t newcap; int status; nfds_t i; pollmgr.fds = NULL; pollmgr.handlers = NULL; pollmgr.capacity = 0; pollmgr.nfds = 0; for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { pollmgr.chan[i][POLLMGR_CHFD_RD] = INVALID_SOCKET; pollmgr.chan[i][POLLMGR_CHFD_WR] = INVALID_SOCKET; } for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { #ifndef RT_OS_WINDOWS status = socketpair(PF_LOCAL, SOCK_DGRAM, 0, pollmgr.chan[i]); if (status < 0) { DPRINTF(("socketpair: %R[sockerr]\n", SOCKERRNO())); goto cleanup_close; } #else status = RTWinSocketPair(PF_INET, SOCK_DGRAM, 0, pollmgr.chan[i]); if (RT_FAILURE(status)) { goto cleanup_close; } #endif } newcap = 16; /* XXX: magic */ LWIP_ASSERT1(newcap >= POLLMGR_SLOT_STATIC_COUNT); newfds = (struct pollfd *) malloc(newcap * sizeof(*pollmgr.fds)); if (newfds == NULL) { DPRINTF(("%s: Failed to allocate fds array\n", __func__)); goto cleanup_close; } newhdls = (struct pollmgr_handler **) malloc(newcap * sizeof(*pollmgr.handlers)); if (newhdls == NULL) { DPRINTF(("%s: Failed to allocate handlers array\n", __func__)); free(newfds); goto cleanup_close; } pollmgr.capacity = newcap; pollmgr.fds = newfds; pollmgr.handlers = newhdls; pollmgr.nfds = POLLMGR_SLOT_STATIC_COUNT; for (i = 0; i < pollmgr.capacity; ++i) { pollmgr.fds[i].fd = INVALID_SOCKET; pollmgr.fds[i].events = 0; pollmgr.fds[i].revents = 0; } return 0; cleanup_close: for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { SOCKET *chan = pollmgr.chan[i]; if (chan[POLLMGR_CHFD_RD] != INVALID_SOCKET) { closesocket(chan[POLLMGR_CHFD_RD]); closesocket(chan[POLLMGR_CHFD_WR]); } } return -1; }