zmq::stream_engine_t::~stream_engine_t () { zmq_assert (!plugged); if (s != retired_fd) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (s); wsa_assert (rc != SOCKET_ERROR); #else int rc = close (s); #ifdef __FreeBSD_kernel__ // FreeBSD may return ECONNRESET on close() under load but this is not // an error. if (rc == -1 && errno == ECONNRESET) rc = 0; #endif errno_assert (rc == 0); #endif s = retired_fd; } int rc = tx_msg.close (); errno_assert (rc == 0); // Drop reference to metadata and destroy it if we are // the only user. if (metadata != NULL) { if (metadata->drop_ref ()) { LIBZMQ_DELETE(metadata); } } LIBZMQ_DELETE(encoder); LIBZMQ_DELETE(decoder); LIBZMQ_DELETE(mechanism); }
int getevents (int s, int events, int timeout) { int rc; fd_set pollset; #if defined NN_HAVE_WINDOWS SOCKET rcvfd; SOCKET sndfd; #else int rcvfd; int sndfd; int maxfd; #endif size_t fdsz; struct timeval tv; int revents; #if !defined NN_HAVE_WINDOWS maxfd = 0; #endif FD_ZERO (&pollset); if (events & NN_IN) { fdsz = sizeof (rcvfd); rc = nn_getsockopt (s, NN_SOL_SOCKET, NN_RCVFD, (char*) &rcvfd, &fdsz); errno_assert (rc == 0); nn_assert (fdsz == sizeof (rcvfd)); FD_SET (rcvfd, &pollset); #if !defined NN_HAVE_WINDOWS if (rcvfd + 1 > maxfd) maxfd = rcvfd + 1; #endif } if (events & NN_OUT) { fdsz = sizeof (sndfd); rc = nn_getsockopt (s, NN_SOL_SOCKET, NN_SNDFD, (char*) &sndfd, &fdsz); errno_assert (rc == 0); nn_assert (fdsz == sizeof (sndfd)); FD_SET (sndfd, &pollset); #if !defined NN_HAVE_WINDOWS if (sndfd + 1 > maxfd) maxfd = sndfd + 1; #endif } if (timeout >= 0) { tv.tv_sec = timeout / 1000; tv.tv_usec = (timeout % 1000) * 1000; } #if defined NN_HAVE_WINDOWS rc = select (0, &pollset, NULL, NULL, timeout < 0 ? NULL : &tv); wsa_assert (rc != SOCKET_ERROR); #else rc = select (maxfd, &pollset, NULL, NULL, timeout < 0 ? NULL : &tv); errno_assert (rc >= 0); #endif revents = 0; if ((events & NN_IN) && FD_ISSET (rcvfd, &pollset)) revents |= NN_IN; if ((events & NN_OUT) && FD_ISSET (sndfd, &pollset)) revents |= NN_OUT; return revents; }
int zmq::wsa_error_to_errno (int errcode) { switch (errcode) { // 10009 - File handle is not valid. case WSAEBADF: return EBADF; // 10013 - Permission denied. case WSAEACCES: return EACCES; // 10014 - Bad address. case WSAEFAULT: return EFAULT; // 10022 - Invalid argument. case WSAEINVAL: return EINVAL; // 10024 - Too many open files. case WSAEMFILE: return EMFILE; // 10036 - Operation now in progress. case WSAEINPROGRESS: return EAGAIN; // 10040 - Message too long. case WSAEMSGSIZE: return EMSGSIZE; // 10043 - Protocol not supported. case WSAEPROTONOSUPPORT: return EPROTONOSUPPORT; // 10047 - Address family not supported by protocol family. case WSAEAFNOSUPPORT: return EAFNOSUPPORT; // 10048 - Address already in use. case WSAEADDRINUSE: return EADDRINUSE; // 10049 - Cannot assign requested address. case WSAEADDRNOTAVAIL: return EADDRNOTAVAIL; // 10050 - Network is down. case WSAENETDOWN: return ENETDOWN; // 10051 - Network is unreachable. case WSAENETUNREACH: return ENETUNREACH; // 10052 - Network dropped connection on reset. case WSAENETRESET: return ENETRESET; // 10053 - Software caused connection abort. case WSAECONNABORTED: return ECONNABORTED; // 10054 - Connection reset by peer. case WSAECONNRESET: return ECONNRESET; // 10055 - No buffer space available. case WSAENOBUFS: return ENOBUFS; // 10057 - Socket is not connected. case WSAENOTCONN: return ENOTCONN; // 10060 - Connection timed out. case WSAETIMEDOUT: return ETIMEDOUT; // 10061 - Connection refused. case WSAECONNREFUSED: return ECONNREFUSED; // 10065 - No route to host. case WSAEHOSTUNREACH: return EHOSTUNREACH; default: wsa_assert (false); } // Not reachable return 0; }
int zmq::signaler_t::wait (int timeout_) { #ifdef HAVE_FORK if (unlikely(pid != getpid())) { // we have forked and the file descriptor is closed. Emulate an interupt // response. //printf("Child process %d signaler_t::wait returning simulating interrupt #1\n", getpid()); errno = EINTR; return -1; } #endif #ifdef ZMQ_POLL_BASED_ON_POLL struct pollfd pfd; pfd.fd = r; pfd.events = POLLIN; int rc = poll (&pfd, 1, timeout_); if (unlikely (rc < 0)) { errno_assert (errno == EINTR); return -1; } else if (unlikely (rc == 0)) { errno = EAGAIN; return -1; } #ifdef HAVE_FORK if (unlikely(pid != getpid())) { // we have forked and the file descriptor is closed. Emulate an interupt // response. //printf("Child process %d signaler_t::wait returning simulating interrupt #2\n", getpid()); errno = EINTR; return -1; } #endif zmq_assert (rc == 1); zmq_assert (pfd.revents & POLLIN); return 0; #elif defined ZMQ_POLL_BASED_ON_SELECT fd_set fds; FD_ZERO (&fds); FD_SET (r, &fds); struct timeval timeout; if (timeout_ >= 0) { timeout.tv_sec = timeout_ / 1000; timeout.tv_usec = timeout_ % 1000 * 1000; } #ifdef ZMQ_HAVE_WINDOWS int rc = select (0, &fds, NULL, NULL, timeout_ >= 0 ? &timeout : NULL); wsa_assert (rc != SOCKET_ERROR); #else int rc = select (r + 1, &fds, NULL, NULL, timeout_ >= 0 ? &timeout : NULL); if (unlikely (rc < 0)) { errno_assert (errno == EINTR); return -1; } #endif if (unlikely (rc == 0)) { errno = EAGAIN; return -1; } zmq_assert (rc == 1); return 0; #else #error #endif }
int zmq::wsa_error_to_errno (int errcode) { switch (errcode) { // 10004 - Interrupted system call. case WSAEINTR: return EINTR; // 10009 - File handle is not valid. case WSAEBADF: return EBADF; // 10013 - Permission denied. case WSAEACCES: return EACCES; // 10014 - Bad address. case WSAEFAULT: return EFAULT; // 10022 - Invalid argument. case WSAEINVAL: return EINVAL; // 10024 - Too many open files. case WSAEMFILE: return EMFILE; // 10035 - Operation would block. case WSAEWOULDBLOCK: return EBUSY; // 10036 - Operation now in progress. case WSAEINPROGRESS: return EAGAIN; // 10037 - Operation already in progress. case WSAEALREADY: return EAGAIN; // 10038 - Socket operation on non-socket. case WSAENOTSOCK: return ENOTSOCK; // 10039 - Destination address required. case WSAEDESTADDRREQ: return EFAULT; // 10040 - Message too long. case WSAEMSGSIZE: return EMSGSIZE; // 10041 - Protocol wrong type for socket. case WSAEPROTOTYPE: return EFAULT; // 10042 - Bad protocol option. case WSAENOPROTOOPT: return EINVAL; // 10043 - Protocol not supported. case WSAEPROTONOSUPPORT: return EPROTONOSUPPORT; // 10044 - Socket type not supported. case WSAESOCKTNOSUPPORT: return EFAULT; // 10045 - Operation not supported on socket. case WSAEOPNOTSUPP: return EFAULT; // 10046 - Protocol family not supported. case WSAEPFNOSUPPORT: return EPROTONOSUPPORT; // 10047 - Address family not supported by protocol family. case WSAEAFNOSUPPORT: return EAFNOSUPPORT; // 10048 - Address already in use. case WSAEADDRINUSE: return EADDRINUSE; // 10049 - Cannot assign requested address. case WSAEADDRNOTAVAIL: return EADDRNOTAVAIL; // 10050 - Network is down. case WSAENETDOWN: return ENETDOWN; // 10051 - Network is unreachable. case WSAENETUNREACH: return ENETUNREACH; // 10052 - Network dropped connection on reset. case WSAENETRESET: return ENETRESET; // 10053 - Software caused connection abort. case WSAECONNABORTED: return ECONNABORTED; // 10054 - Connection reset by peer. case WSAECONNRESET: return ECONNRESET; // 10055 - No buffer space available. case WSAENOBUFS: return ENOBUFS; // 10056 - Socket is already connected. case WSAEISCONN: return EFAULT; // 10057 - Socket is not connected. case WSAENOTCONN: return ENOTCONN; // 10058 - Can't send after socket shutdown. case WSAESHUTDOWN: return EFAULT; // 10059 - Too many references can't splice. case WSAETOOMANYREFS: return EFAULT; // 10060 - Connection timed out. case WSAETIMEDOUT: return ETIMEDOUT; // 10061 - Connection refused. case WSAECONNREFUSED: return ECONNREFUSED; // 10062 - Too many levels of symbolic links. case WSAELOOP: return EFAULT; // 10063 - File name too long. case WSAENAMETOOLONG: return EFAULT; // 10064 - Host is down. case WSAEHOSTDOWN: return EAGAIN; // 10065 - No route to host. case WSAEHOSTUNREACH: return EHOSTUNREACH; // 10066 - Directory not empty. case WSAENOTEMPTY: return EFAULT; // 10067 - Too many processes. case WSAEPROCLIM: return EFAULT; // 10068 - Too many users. case WSAEUSERS: return EFAULT; // 10069 - Disc Quota Exceeded. case WSAEDQUOT: return EFAULT; // 10070 - Stale NFS file handle. case WSAESTALE: return EFAULT; // 10071 - Too many levels of remote in path. case WSAEREMOTE: return EFAULT; // 10091 - Network SubSystem is unavailable. case WSASYSNOTREADY: return EFAULT; // 10092 - WINSOCK DLL Version out of range. case WSAVERNOTSUPPORTED: return EFAULT; // 10093 - Successful WSASTARTUP not yet performed. case WSANOTINITIALISED: return EFAULT; // 11001 - Host not found. case WSAHOST_NOT_FOUND: return EFAULT; // 11002 - Non-Authoritative Host not found. case WSATRY_AGAIN: return EFAULT; // 11003 - Non-Recoverable errors: FORMERR REFUSED NOTIMP. case WSANO_RECOVERY: return EFAULT; // 11004 - Valid name no data record of requested. case WSANO_DATA: return EFAULT; default: wsa_assert (false); } // Not reachable return 0; }
void zmq::tune_tcp_keepalives (fd_t s_, int keepalive_, int keepalive_cnt_, int keepalive_idle_, int keepalive_intvl_) { // These options are used only under certain #ifdefs below. (void)keepalive_; (void)keepalive_cnt_; (void)keepalive_idle_; (void)keepalive_intvl_; // If none of the #ifdefs apply, then s_ is unused. (void)s_; // Tuning TCP keep-alives if platform allows it // All values = -1 means skip and leave it for OS #ifdef ZMQ_HAVE_SO_KEEPALIVE if (keepalive_ != -1) { int rc = setsockopt (s_, SOL_SOCKET, SO_KEEPALIVE, (char*) &keepalive_, sizeof (int)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif #ifdef ZMQ_HAVE_TCP_KEEPCNT if (keepalive_cnt_ != -1) { int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPCNT, &keepalive_cnt_, sizeof (int)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif } #endif // ZMQ_HAVE_TCP_KEEPCNT #ifdef ZMQ_HAVE_TCP_KEEPIDLE if (keepalive_idle_ != -1) { int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle_, sizeof (int)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif } #else // ZMQ_HAVE_TCP_KEEPIDLE #ifdef ZMQ_HAVE_TCP_KEEPALIVE if (keepalive_idle_ != -1) { int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_idle_, sizeof (int)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif } #endif // ZMQ_HAVE_TCP_KEEPALIVE #endif // ZMQ_HAVE_TCP_KEEPIDLE #ifdef ZMQ_HAVE_TCP_KEEPINTVL if (keepalive_intvl_ != -1) { int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl_, sizeof (int)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif } #endif // ZMQ_HAVE_TCP_KEEPINTVL } #endif // ZMQ_HAVE_SO_KEEPALIVE }
int zmq::tcp_listener_t::set_address (const char *addr_) { // Convert the textual address into address structure. int rc = address.resolve (addr_, true, options.ipv6); if (rc != 0) return -1; address.to_string (endpoint); if (options.use_fd != -1) { s = options.use_fd; socket->event_listening (endpoint, (int) s); return 0; } // Create a listening socket. s = open_socket (address.family (), SOCK_STREAM, IPPROTO_TCP); // IPv6 address family not supported, try automatic downgrade to IPv4. if (s == zmq::retired_fd && address.family () == AF_INET6 && errno == EAFNOSUPPORT && options.ipv6) { rc = address.resolve (addr_, true, false); if (rc != 0) return rc; s = open_socket (AF_INET, SOCK_STREAM, IPPROTO_TCP); } #ifdef ZMQ_HAVE_WINDOWS if (s == INVALID_SOCKET) { errno = wsa_error_to_errno (WSAGetLastError ()); return -1; } #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) s, HANDLE_FLAG_INHERIT, 0); win_assert (brc); #endif #else if (s == -1) return -1; #endif // On some systems, IPv4 mapping in IPv6 sockets is disabled by default. // Switch it on in such cases. if (address.family () == AF_INET6) enable_ipv4_mapping (s); // Set the IP Type-Of-Service for the underlying socket if (options.tos != 0) set_ip_type_of_service (s, options.tos); // Set the socket to loopback fastpath if configured. if (options.loopback_fastpath) tcp_tune_loopback_fast_path (s); // Bind the socket to a device if applicable if (!options.bound_device.empty ()) bind_to_device (s, options.bound_device); // Set the socket buffer limits for the underlying socket. if (options.sndbuf >= 0) set_tcp_send_buffer (s, options.sndbuf); if (options.rcvbuf >= 0) set_tcp_receive_buffer (s, options.rcvbuf); // Allow reusing of the address. int flag = 1; #ifdef ZMQ_HAVE_WINDOWS rc = setsockopt (s, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, (const char *) &flag, sizeof (int)); wsa_assert (rc != SOCKET_ERROR); #else rc = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof (int)); errno_assert (rc == 0); #endif // Bind the socket to the network interface and port. rc = bind (s, address.addr (), address.addrlen ()); #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); goto error; } #else if (rc != 0) goto error; #endif // Listen for incoming connections. rc = listen (s, options.backlog); #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); goto error; } #else if (rc != 0) goto error; #endif socket->event_listening (endpoint, (int) s); return 0; error: int err = errno; close (); errno = err; return -1; }
void zmq::select_t::loop () { while (!stopping) { // Execute any due timers. int timeout = (int) execute_timers (); #if defined ZMQ_HAVE_OSX struct timeval tv = { (long) (timeout / 1000), timeout % 1000 * 1000 }; #else struct timeval tv = { (long) (timeout / 1000), (long) (timeout % 1000 * 1000) }; #endif int rc = 0; #if defined ZMQ_HAVE_WINDOWS /* On Windows select does not allow to mix descriptors from different service providers. It seems to work for AF_INET and AF_INET6, but fails for AF_INET and VMCI. The workaround is to use WSAEventSelect and WSAWaitForMultipleEvents to wait, then use select to find out what actually changed. WSAWaitForMultipleEvents cannot be used alone, because it does not support more than 64 events which is not enough. To reduce unncessary overhead, WSA is only used when there are more than one family. Moreover, AF_INET and AF_INET6 are considered the same family because Windows seems to handle them properly. See get_fd_family for details. */ wsa_events_t wsa_events; // If there is just one family, there is no reason to use WSA events. if (family_entries.size () > 1) { for (family_entries_t::iterator family_entry_it = family_entries.begin (); family_entry_it != family_entries.end (); ++family_entry_it) { family_entry_t& family_entry = family_entry_it->second; for (fd_entries_t::iterator fd_entry_it = family_entry.fd_entries.begin (); fd_entry_it != family_entry.fd_entries.end (); ++fd_entry_it) { fd_t fd = fd_entry_it->fd; // http://stackoverflow.com/q/35043420/188530 if (FD_ISSET (fd, &family_entry.fds_set.read) && FD_ISSET (fd, &family_entry.fds_set.write)) rc = WSAEventSelect (fd, wsa_events.events [3], FD_READ | FD_ACCEPT | FD_CLOSE | FD_WRITE | FD_CONNECT | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.read)) rc = WSAEventSelect (fd, wsa_events.events [0], FD_READ | FD_ACCEPT | FD_CLOSE | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.write)) rc = WSAEventSelect (fd, wsa_events.events [1], FD_WRITE | FD_CONNECT | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.error)) rc = WSAEventSelect (fd, wsa_events.events [2], FD_OOB); else rc = 0; wsa_assert (rc != SOCKET_ERROR); } } } #endif #if defined ZMQ_HAVE_WINDOWS if (family_entries.size () > 1) { rc = WSAWaitForMultipleEvents (4, wsa_events.events, FALSE, timeout ? timeout : INFINITE, FALSE); wsa_assert (rc != (int)WSA_WAIT_FAILED); zmq_assert (rc != WSA_WAIT_IO_COMPLETION); if (rc == WSA_WAIT_TIMEOUT) continue; } for (current_family_entry_it = family_entries.begin (); current_family_entry_it != family_entries.end (); ++current_family_entry_it) { family_entry_t& family_entry = current_family_entry_it->second; // select will fail when run with empty sets. if (family_entry.fd_entries.empty ()) continue; fds_set_t local_fds_set = family_entry.fds_set; if (family_entries.size () > 1) { // There is no reason to wait again after WSAWaitForMultipleEvents. // Simply collect what is ready. struct timeval tv_nodelay = { 0, 0 }; rc = select (0, &local_fds_set.read, &local_fds_set.write, &local_fds_set.error, &tv_nodelay); } else rc = select (0, &local_fds_set.read, &local_fds_set.write, &local_fds_set.error, timeout > 0 ? &tv : NULL); wsa_assert (rc != SOCKET_ERROR); // Size is cached to avoid iteration through recently added descriptors. for (fd_entries_t::size_type i = 0, size = family_entry.fd_entries.size (); i < size && rc > 0; ++i) { fd_entry_t fd_entry = family_entry.fd_entries [i]; if (fd_entry.fd == retired_fd) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.read)) { fd_entry.events->in_event (); --rc; } if (fd_entry.fd == retired_fd || rc == 0) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.write)) { fd_entry.events->out_event (); --rc; } if (fd_entry.fd == retired_fd || rc == 0) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.error)) { fd_entry.events->in_event (); --rc; } } if (family_entry.retired) { family_entry.retired = false; family_entry.fd_entries.erase (std::remove_if (family_entry.fd_entries.begin (), family_entry.fd_entries.end (), is_retired_fd), family_entry.fd_entries.end ()); } } #else fds_set_t local_fds_set = fds_set; rc = select (maxfd + 1, &local_fds_set.read, &local_fds_set.write, &local_fds_set.error, timeout ? &tv : NULL); if (rc == -1) { errno_assert (errno == EINTR); continue; } // Size is cached to avoid iteration through just added descriptors. for (fd_entries_t::size_type i = 0, size = fd_entries.size (); i < size && rc > 0; ++i) { fd_entry_t& fd_entry = fd_entries [i]; if (fd_entry.fd == retired_fd) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.read)) { fd_entry.events->in_event (); --rc; } if (fd_entry.fd == retired_fd || rc == 0) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.write)) { fd_entry.events->out_event (); --rc; } if (fd_entry.fd == retired_fd || rc == 0) continue; if (FD_ISSET (fd_entry.fd, &local_fds_set.error)) { fd_entry.events->in_event (); --rc; } } if (retired) { retired = false; fd_entries.erase (std::remove_if (fd_entries.begin (), fd_entries.end (), is_retired_fd), fd_entries.end ()); } #endif } }
void zmq::select_t::loop () { while (!stopping) { // Execute any due timers. int timeout = (int) execute_timers (); // Intialise the pollsets. memcpy (&readfds, &source_set_in, sizeof source_set_in); memcpy (&writefds, &source_set_out, sizeof source_set_out); memcpy (&exceptfds, &source_set_err, sizeof source_set_err); // Wait for events. #ifdef ZMQ_HAVE_OSX struct timeval tv = {static_cast<int>(timeout / 1000), static_cast<int>(timeout % 1000 * 1000)}; #else struct timeval tv = {(long) (timeout / 1000), (long) (timeout % 1000 * 1000)}; #endif #ifdef ZMQ_HAVE_WINDOWS int rc = select (0, &readfds, &writefds, &exceptfds, timeout ? &tv : NULL); wsa_assert (rc != SOCKET_ERROR); #else int rc = select (maxfd + 1, &readfds, &writefds, &exceptfds, timeout ? &tv : NULL); if (rc == -1) { errno_assert (errno == EINTR); continue; } #endif // If there are no events (i.e. it's a timeout) there's no point // in checking the pollset. if (rc == 0) continue; for (fd_set_t::size_type i = 0; i < fds.size (); i ++) { if (fds [i].fd == retired_fd) continue; if (FD_ISSET (fds [i].fd, &exceptfds)) fds [i].events->in_event (); if (fds [i].fd == retired_fd) continue; if (FD_ISSET (fds [i].fd, &writefds)) fds [i].events->out_event (); if (fds [i].fd == retired_fd) continue; if (FD_ISSET (fds [i].fd, &readfds)) fds [i].events->in_event (); } // Destroy retired event sources. if (retired) { fds.erase (std::remove_if (fds.begin (), fds.end (), zmq::select_t::is_retired_fd), fds.end ()); retired = false; } } }
int nn_efd_wait (struct nn_efd *self, int timeout) { int rc; struct timeval tv; SOCKET fd = self->r; uint64_t expire; if (timeout > 0) { expire = nn_clock_ms() + timeout; tv.tv_sec = timeout / 1000; tv.tv_usec = timeout % 1000 * 1000; } else { expire = timeout; } for (;;) { if (nn_slow (fd == INVALID_SOCKET)) { return -EBADF; } FD_SET (fd, &self->fds); switch (expire) { case 0: tv.tv_sec = 0; tv.tv_usec = 0; break; case (uint64_t)-1: tv.tv_sec = 0; tv.tv_usec = 100000; break; default: timeout = (int)(expire - nn_clock_ms()); if (timeout < 0) { return -ETIMEDOUT; } if (timeout > 100) { tv.tv_sec = 0; tv.tv_usec = 100000; } else { tv.tv_sec = timeout / 1000; tv.tv_usec = timeout % 1000 * 1000; } } rc = select (0, &self->fds, NULL, NULL, &tv); if (nn_slow (rc == SOCKET_ERROR)) { rc = nn_err_wsa_to_posix (WSAGetLastError ()); errno = rc; /* Treat these as a non-fatal errors, typically occuring when the socket is being closed from a separate thread during a blocking I/O operation. */ if (rc == EINTR || rc == ENOTSOCK) return -EINTR; } else if (rc == 0) { if (expire == 0) return -ETIMEDOUT; if ((expire != (uint64_t)-1) && (expire < nn_clock_ms())) { return -ETIMEDOUT; } continue; } wsa_assert (rc >= 0); return 0; } }
void zmq::udp_engine_t::plug (io_thread_t *io_thread_, session_base_t *session_) { zmq_assert (!plugged); plugged = true; zmq_assert (!session); zmq_assert (session_); session = session_; // Connect to I/O threads poller object. io_object_t::plug (io_thread_); handle = add_fd (fd); // Bind the socket to a device if applicable if (!options.bound_device.empty ()) bind_to_device (fd, options.bound_device); if (send_enabled) { if (!options.raw_socket) { out_address = address->resolved.udp_addr->dest_addr (); out_addrlen = address->resolved.udp_addr->dest_addrlen (); } else { out_address = (sockaddr *) &raw_address; out_addrlen = sizeof (sockaddr_in); } set_pollout (handle); } if (recv_enabled) { int on = 1; int rc = setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof (on)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif #ifdef ZMQ_HAVE_VXWORKS rc = bind (fd, (sockaddr *) address->resolved.udp_addr->bind_addr (), address->resolved.udp_addr->bind_addrlen ()); #else rc = bind (fd, address->resolved.udp_addr->bind_addr (), address->resolved.udp_addr->bind_addrlen ()); #endif #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif if (address->resolved.udp_addr->is_mcast ()) { struct ip_mreq mreq; mreq.imr_multiaddr = address->resolved.udp_addr->multicast_ip (); mreq.imr_interface = address->resolved.udp_addr->interface_ip (); rc = setsockopt (fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (char *) &mreq, sizeof (mreq)); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc == 0); #endif } set_pollin (handle); // Call restart output to drop all join/leave commands restart_output (); } }
void zmq::udp_engine_t::in_event () { struct sockaddr_in in_address; socklen_t in_addrlen = sizeof (sockaddr_in); #ifdef ZMQ_HAVE_WINDOWS int nbytes = recvfrom (fd, (char *) in_buffer, MAX_UDP_MSG, 0, (sockaddr *) &in_address, &in_addrlen); const int last_error = WSAGetLastError (); if (nbytes == SOCKET_ERROR) { wsa_assert (last_error == WSAENETDOWN || last_error == WSAENETRESET || last_error == WSAEWOULDBLOCK); return; } #elif defined ZMQ_HAVE_VXWORKS int nbytes = recvfrom (fd, (char *) in_buffer, MAX_UDP_MSG, 0, (sockaddr *) &in_address, (int *) &in_addrlen); if (nbytes == -1) { errno_assert (errno != EBADF && errno != EFAULT && errno != ENOMEM && errno != ENOTSOCK); return; } #else int nbytes = recvfrom (fd, in_buffer, MAX_UDP_MSG, 0, (sockaddr *) &in_address, &in_addrlen); if (nbytes == -1) { errno_assert (errno != EBADF && errno != EFAULT && errno != ENOMEM && errno != ENOTSOCK); return; } #endif int rc; int body_size; int body_offset; msg_t msg; if (options.raw_socket) { sockaddr_to_msg (&msg, &in_address); body_size = nbytes; body_offset = 0; } else { char *group_buffer = (char *) in_buffer + 1; int group_size = in_buffer[0]; rc = msg.init_size (group_size); errno_assert (rc == 0); msg.set_flags (msg_t::more); memcpy (msg.data (), group_buffer, group_size); // This doesn't fit, just ingore if (nbytes - 1 < group_size) return; body_size = nbytes - 1 - group_size; body_offset = 1 + group_size; } // Push group description to session rc = session->push_msg (&msg); errno_assert (rc == 0 || (rc == -1 && errno == EAGAIN)); // Group description message doesn't fit in the pipe, drop if (rc != 0) { rc = msg.close (); errno_assert (rc == 0); reset_pollin (handle); return; } rc = msg.close (); errno_assert (rc == 0); rc = msg.init_size (body_size); errno_assert (rc == 0); memcpy (msg.data (), in_buffer + body_offset, body_size); // Push message body to session rc = session->push_msg (&msg); // Message body doesn't fit in the pipe, drop and reset session state if (rc != 0) { rc = msg.close (); errno_assert (rc == 0); session->reset (); reset_pollin (handle); return; } rc = msg.close (); errno_assert (rc == 0); session->flush (); }
void zmq::udp_engine_t::out_event () { msg_t group_msg; int rc = session->pull_msg (&group_msg); errno_assert (rc == 0 || (rc == -1 && errno == EAGAIN)); if (rc == 0) { msg_t body_msg; rc = session->pull_msg (&body_msg); size_t group_size = group_msg.size (); size_t body_size = body_msg.size (); size_t size; if (options.raw_socket) { rc = resolve_raw_address ((char *) group_msg.data (), group_size); // We discard the message if address is not valid if (rc != 0) { rc = group_msg.close (); errno_assert (rc == 0); body_msg.close (); errno_assert (rc == 0); return; } size = body_size; memcpy (out_buffer, body_msg.data (), body_size); } else { size = group_size + body_size + 1; // TODO: check if larger than maximum size out_buffer[0] = (unsigned char) group_size; memcpy (out_buffer + 1, group_msg.data (), group_size); memcpy (out_buffer + 1 + group_size, body_msg.data (), body_size); } rc = group_msg.close (); errno_assert (rc == 0); body_msg.close (); errno_assert (rc == 0); #ifdef ZMQ_HAVE_WINDOWS rc = sendto (fd, (const char *) out_buffer, (int) size, 0, out_address, (int) out_addrlen); wsa_assert (rc != SOCKET_ERROR); #elif defined ZMQ_HAVE_VXWORKS rc = sendto (fd, (caddr_t) out_buffer, size, 0, (sockaddr *) out_address, (int) out_addrlen); errno_assert (rc != -1); #else rc = sendto (fd, out_buffer, size, 0, out_address, out_addrlen); errno_assert (rc != -1); #endif } else reset_pollout (handle); }
static int make_fdpair (xs::fd_t *r_, xs::fd_t *w_) { #if defined XS_HAVE_EVENTFD // Create eventfd object. #if defined EFD_CLOEXEC xs::fd_t fd = eventfd (0, EFD_CLOEXEC); if (fd == -1) return -1; #else xs::fd_t fd = eventfd (0, 0); if (fd == -1) return -1; #if defined FD_CLOEXEC int rc = fcntl (fd, F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); #endif #endif *w_ = fd; *r_ = fd; return 0; #elif defined XS_HAVE_WINDOWS // On Windows we are using TCP sockets for in-process communication. // That is a security hole -- other processes on the same box may connect // to the bound TCP port and hook into internal signal processing of // the library. To solve this problem we should use a proper in-process // signaling mechanism such as private semaphore. However, on Windows, // these cannot be polled on using select(). Other functions that allow // polling on these objects (e.g. WaitForMulitpleObjects) don't allow // to poll on sockets. Thus, the only way to fix the problem is to // implement IOCP polling mechanism that allows to poll on both sockets // and in-process synchronisation objects. // Make the following critical section accessible to everyone. SECURITY_ATTRIBUTES sa = {0}; sa.nLength = sizeof (sa); sa.bInheritHandle = FALSE; SECURITY_DESCRIPTOR sd; BOOL ok = InitializeSecurityDescriptor (&sd, SECURITY_DESCRIPTOR_REVISION); win_assert (ok); ok = SetSecurityDescriptorDacl(&sd, TRUE, (PACL) NULL, FALSE); win_assert (ok); sa.lpSecurityDescriptor = &sd; // This function has to be in a system-wide critical section so that // two instances of the library don't accidentally create signaler // crossing the process boundary. // We'll use named event object to implement the critical section. HANDLE sync = CreateEvent (&sa, FALSE, TRUE, "xs-signaler-port-sync"); win_assert (sync != NULL); // Enter the critical section. DWORD dwrc = WaitForSingleObject (sync, INFINITE); xs_assert (dwrc == WAIT_OBJECT_0); // Windows has no 'socketpair' function. CreatePipe is no good as pipe // handles cannot be polled on. Here we create the socketpair by hand. *w_ = INVALID_SOCKET; *r_ = INVALID_SOCKET; // Create listening socket. SOCKET listener; listener = xs::open_socket (AF_INET, SOCK_STREAM, 0); if (listener == xs::retired_fd) return -1; // Set SO_REUSEADDR and TCP_NODELAY on listening socket. BOOL so_reuseaddr = 1; int rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR, (char *)&so_reuseaddr, sizeof (so_reuseaddr)); wsa_assert (rc != SOCKET_ERROR); BOOL tcp_nodelay = 1; rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Bind listening socket to the local port. struct sockaddr_in addr; memset (&addr, 0, sizeof (addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = htons (xs::signaler_port); rc = bind (listener, (const struct sockaddr*) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Listen for incomming connections. rc = listen (listener, 1); wsa_assert (rc != SOCKET_ERROR); // Create the writer socket. *w_ = WSASocket (AF_INET, SOCK_STREAM, 0, NULL, 0, 0); if (*w_ == xs::retired_fd) { rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); return -1; } // Set TCP_NODELAY on writer socket. rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Connect writer to the listener. rc = connect (*w_, (sockaddr *) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Accept connection from writer. *r_ = accept (listener, NULL, NULL); if (*r_ == xs::retired_fd) { rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); rc = closesocket (*w_); wsa_assert (rc != SOCKET_ERROR); return -1; } // We don't need the listening socket anymore. Close it. rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); // Exit the critical section. BOOL brc = SetEvent (sync); win_assert (brc != 0); return 0; #elif defined XS_HAVE_OPENVMS // Whilst OpenVMS supports socketpair - it maps to AF_INET only. Further, // it does not set the socket options TCP_NODELAY and TCP_NODELACK which // can lead to performance problems. // // The bug will be fixed in V5.6 ECO4 and beyond. In the meantime, we'll // create the socket pair manually. sockaddr_in lcladdr; memset (&lcladdr, 0, sizeof (lcladdr)); lcladdr.sin_family = AF_INET; lcladdr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); lcladdr.sin_port = 0; int listener = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (listener != -1); int on = 1; int rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = bind(listener, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); socklen_t lcladdr_len = sizeof (lcladdr); rc = getsockname (listener, (struct sockaddr*) &lcladdr, &lcladdr_len); errno_assert (rc != -1); rc = listen (listener, 1); errno_assert (rc != -1); *w_ = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (*w_ != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = connect (*w_, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); *r_ = accept (listener, NULL, NULL); errno_assert (*r_ != -1); close (listener); return 0; #else // All other implementations support socketpair() int sv [2]; #if defined XS_HAVE_SOCK_CLOEXEC int rc = socketpair (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sv); if (rc == -1) return -1; #else int rc = socketpair (AF_UNIX, SOCK_STREAM, 0, sv); if (rc == -1) return -1; errno_assert (rc == 0); #if defined FD_CLOEXEC rc = fcntl (sv [0], F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); rc = fcntl (sv [1], F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); #endif #endif *w_ = sv [0]; *r_ = sv [1]; return 0; #endif }
void nsocketpair(fd_t* pair) { #ifdef _WIN32 // Create listening socket. SOCKET listener; BOOL so_reuseaddr = 1; int rc; BOOL tcp_nodelay = 1; struct sockaddr_in addr; int addrlen = sizeof (addr); // Windows has no 'socketpair' function. CreatePipe is no good as pipe // handles cannot be polled on. Here we create the socketpair by hand. pair[0] = INVALID_SOCKET; pair[1] = INVALID_SOCKET; socket_init(); listener = socket (AF_INET, SOCK_STREAM, 0); wsa_assert (listener != INVALID_SOCKET); // Set SO_REUSEADDR and TCP_NODELAY on listening socket. rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR, (char *)&so_reuseaddr, sizeof (so_reuseaddr)); wsa_assert (rc != SOCKET_ERROR); rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Bind listening socket to any free local port. memset (&addr, 0, sizeof (addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = 0; rc = bind (listener, (const struct sockaddr*) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Retrieve local port listener is bound to (into addr). rc = getsockname (listener, (struct sockaddr*) &addr, &addrlen); wsa_assert (rc != SOCKET_ERROR); // Listen for incomming connections. rc = listen (listener, 1); wsa_assert (rc != SOCKET_ERROR); // Create the writer socket. pair[0] = WSASocket (AF_INET, SOCK_STREAM, 0, NULL, 0, 0); wsa_assert (pair[0] != INVALID_SOCKET); // Set TCP_NODELAY on writer socket. rc = setsockopt (pair[0], IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Connect writer to the listener. rc = connect (pair[0], (struct sockaddr *) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Accept connection from writer. pair[1] = accept (listener, NULL, NULL); wsa_assert (pair[1] != INVALID_SOCKET); // We don't need the listening socket anymore. Close it. rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); #else int rc = socketpair (AF_UNIX, SOCK_STREAM, 0, pair); errno_assert (rc == 0); #endif }
int zmq::tcp_listener_t::set_address (const char *addr_) { // Convert the textual address into address structure. int rc = address.resolve (addr_, true, options.ipv4only ? true : false); if (rc != 0) return -1; // Create a listening socket. s = open_socket (address.family (), SOCK_STREAM, IPPROTO_TCP); #ifdef ZMQ_HAVE_WINDOWS if (s == INVALID_SOCKET) errno = wsa_error_to_errno (WSAGetLastError ()); #endif // IPv6 address family not supported, try automatic downgrade to IPv4. if (address.family () == AF_INET6 && errno == EAFNOSUPPORT && !options.ipv4only) { rc = address.resolve (addr_, true, true); if (rc != 0) return rc; s = ::socket (address.family (), SOCK_STREAM, IPPROTO_TCP); } #ifdef ZMQ_HAVE_WINDOWS if (s == INVALID_SOCKET) { errno = wsa_error_to_errno (WSAGetLastError ()); return -1; } // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) s, HANDLE_FLAG_INHERIT, 0); win_assert (brc); #else if (s == -1) return -1; #endif // On some systems, IPv4 mapping in IPv6 sockets is disabled by default. // Switch it on in such cases. if (address.family () == AF_INET6) enable_ipv4_mapping (s); // Allow reusing of the address. int flag = 1; #ifdef ZMQ_HAVE_WINDOWS rc = setsockopt (s, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, (const char*) &flag, sizeof (int)); wsa_assert (rc != SOCKET_ERROR); #else rc = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof (int)); errno_assert (rc == 0); #endif address.to_string (endpoint); // Bind the socket to the network interface and port. rc = bind (s, address.addr (), address.addrlen ()); #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); return -1; } #else if (rc != 0) return -1; #endif // Listen for incomming connections. rc = listen (s, options.backlog); #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); return -1; } #else if (rc != 0) return -1; #endif socket->monitor_event (ZMQ_EVENT_LISTENING, addr_, s); return 0; }
int zmq_poll (zmq_pollitem_t *items_, int nitems_, long timeout_) { #if defined ZMQ_HAVE_LINUX || defined ZMQ_HAVE_FREEBSD ||\ defined ZMQ_HAVE_OPENBSD || defined ZMQ_HAVE_SOLARIS ||\ defined ZMQ_HAVE_OSX || defined ZMQ_HAVE_QNXNTO ||\ defined ZMQ_HAVE_HPUX || defined ZMQ_HAVE_AIX ||\ defined ZMQ_HAVE_NETBSD pollfd *pollfds = (pollfd*) malloc (nitems_ * sizeof (pollfd)); zmq_assert (pollfds); int npollfds = 0; int nsockets = 0; zmq::app_thread_t *app_thread = NULL; for (int i = 0; i != nitems_; i++) { // 0MQ sockets. if (items_ [i].socket) { // Get the app_thread the socket is living in. If there are two // sockets in the same pollset with different app threads, fail. zmq::socket_base_t *s = (zmq::socket_base_t*) items_ [i].socket; if (app_thread) { if (app_thread != s->get_thread ()) { free (pollfds); errno = EFAULT; return -1; } } else app_thread = s->get_thread (); nsockets++; continue; } // Raw file descriptors. pollfds [npollfds].fd = items_ [i].fd; pollfds [npollfds].events = (items_ [i].events & ZMQ_POLLIN ? POLLIN : 0) | (items_ [i].events & ZMQ_POLLOUT ? POLLOUT : 0); npollfds++; } // If there's at least one 0MQ socket in the pollset we have to poll // for 0MQ commands. If ZMQ_POLL was not set, fail. if (nsockets) { pollfds [npollfds].fd = app_thread->get_signaler ()->get_fd (); if (pollfds [npollfds].fd == zmq::retired_fd) { free (pollfds); errno = ENOTSUP; return -1; } pollfds [npollfds].events = POLLIN; npollfds++; } // First iteration just check for events, don't block. Waiting would // prevent exiting on any events that may already been signaled on // 0MQ sockets. int rc = poll (pollfds, npollfds, 0); if (rc == -1 && errno == EINTR) { free (pollfds); return 0; } errno_assert (rc >= 0); int timeout = timeout_ > 0 ? timeout_ / 1000 : -1; int nevents = 0; while (true) { // Process 0MQ commands if needed. if (nsockets && pollfds [npollfds -1].revents & POLLIN) app_thread->process_commands (false, false); // Check for the events. int pollfd_pos = 0; for (int i = 0; i != nitems_; i++) { // If the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. if (!items_ [i].socket) { items_ [i].revents = (pollfds [pollfd_pos].revents & POLLIN ? ZMQ_POLLIN : 0) | (pollfds [pollfd_pos].revents & POLLOUT ? ZMQ_POLLOUT : 0); if (items_ [i].revents) nevents++; pollfd_pos++; continue; } // The poll item is a 0MQ socket. zmq::socket_base_t *s = (zmq::socket_base_t*) items_ [i].socket; items_ [i].revents = 0; if ((items_ [i].events & ZMQ_POLLOUT) && s->has_out ()) items_ [i].revents |= ZMQ_POLLOUT; if ((items_ [i].events & ZMQ_POLLIN) && s->has_in ()) items_ [i].revents |= ZMQ_POLLIN; if (items_ [i].revents) nevents++; } // If there's at least one event, or if we are asked not to block, // return immediately. if (nevents || !timeout) break; // Wait for events. rc = poll (pollfds, npollfds, timeout); if (rc == -1 && errno == EINTR) break; errno_assert (rc >= 0); // If timeout was hit with no events signaled, return zero. if (rc == 0) break; // If timeout was already applied, we don't want to poll anymore. // Setting timeout to zero will cause termination of the function // once the events we've got are processed. if (timeout > 0) timeout = 0; } free (pollfds); return nevents; #elif defined ZMQ_HAVE_WINDOWS || defined ZMQ_HAVE_OPENVMS fd_set pollset_in; FD_ZERO (&pollset_in); fd_set pollset_out; FD_ZERO (&pollset_out); fd_set pollset_err; FD_ZERO (&pollset_err); zmq::app_thread_t *app_thread = NULL; int nsockets = 0; zmq::fd_t maxfd = zmq::retired_fd; zmq::fd_t notify_fd = zmq::retired_fd; for (int i = 0; i != nitems_; i++) { // 0MQ sockets. if (items_ [i].socket) { // Get the app_thread the socket is living in. If there are two // sockets in the same pollset with different app threads, fail. zmq::socket_base_t *s = (zmq::socket_base_t*) items_ [i].socket; if (app_thread) { if (app_thread != s->get_thread ()) { errno = EFAULT; return -1; } } else app_thread = s->get_thread (); nsockets++; continue; } // Raw file descriptors. if (items_ [i].events & ZMQ_POLLIN) FD_SET (items_ [i].fd, &pollset_in); if (items_ [i].events & ZMQ_POLLOUT) FD_SET (items_ [i].fd, &pollset_out); if (maxfd == zmq::retired_fd || maxfd < items_ [i].fd) maxfd = items_ [i].fd; } // If there's at least one 0MQ socket in the pollset we have to poll // for 0MQ commands. If ZMQ_POLL was not set, fail. if (nsockets) { notify_fd = app_thread->get_signaler ()->get_fd (); if (notify_fd == zmq::retired_fd) { errno = ENOTSUP; return -1; } FD_SET (notify_fd, &pollset_in); if (maxfd == zmq::retired_fd || maxfd < notify_fd) maxfd = notify_fd; } bool block = (timeout_ < 0); timeval timeout = {timeout_ / 1000000, timeout_ % 1000000}; timeval zero_timeout = {0, 0}; int nevents = 0; // First iteration just check for events, don't block. Waiting would // prevent exiting on any events that may already been signaled on // 0MQ sockets. fd_set inset, outset, errset; memcpy (&inset, &pollset_in, sizeof (fd_set)); memcpy (&outset, &pollset_out, sizeof (fd_set)); memcpy (&errset, &pollset_err, sizeof (fd_set)); int rc = select (maxfd, &inset, &outset, &errset, &zero_timeout); #if defined ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc != -1 || errno != EINTR); #endif while (true) { // Process 0MQ commands if needed. if (nsockets && FD_ISSET (notify_fd, &inset)) app_thread->process_commands (false, false); // Check for the events. for (int i = 0; i != nitems_; i++) { // If the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. if (!items_ [i].socket) { items_ [i].revents = (FD_ISSET (items_ [i].fd, &inset) ? ZMQ_POLLIN : 0) | (FD_ISSET (items_ [i].fd, &outset) ? ZMQ_POLLOUT : 0); if (items_ [i].revents) nevents++; continue; } // The poll item is a 0MQ socket. zmq::socket_base_t *s = (zmq::socket_base_t*) items_ [i].socket; items_ [i].revents = 0; if ((items_ [i].events & ZMQ_POLLOUT) && s->has_out ()) items_ [i].revents |= ZMQ_POLLOUT; if ((items_ [i].events & ZMQ_POLLIN) && s->has_in ()) items_ [i].revents |= ZMQ_POLLIN; if (items_ [i].revents) nevents++; } // If there's at least one event, or if we are asked not to block, // return immediately. if (nevents || (timeout.tv_sec == 0 && timeout.tv_usec == 0)) break; // Wait for events. memcpy (&inset, &pollset_in, sizeof (fd_set)); memcpy (&outset, &pollset_out, sizeof (fd_set)); memcpy (&errset, &pollset_err, sizeof (fd_set)); int rc = select (maxfd, &inset, &outset, &errset, block ? NULL : &timeout); #if defined ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else errno_assert (rc != -1 || errno != EINTR); #endif // If timeout was hit with no events signaled, return zero. if (rc == 0) break; // If timeout was already applied, we don't want to poll anymore. // Setting timeout to zero will cause termination of the function // once the events we've got are processed. if (!block) timeout = zero_timeout; } return nevents; #else errno = ENOTSUP; return -1; #endif }
zmq::fd_t zmq::tcp_listener_t::accept () { // The situation where connection cannot be accepted due to insufficient // resources is considered valid and treated by ignoring the connection. // Accept one connection and deal with different failure modes. zmq_assert (s != retired_fd); struct sockaddr_storage ss; memset (&ss, 0, sizeof (ss)); #ifdef ZMQ_HAVE_HPUX int ss_len = sizeof (ss); #else socklen_t ss_len = sizeof (ss); #endif fd_t sock = ::accept (s, (struct sockaddr *) &ss, &ss_len); #ifdef ZMQ_HAVE_WINDOWS if (sock == INVALID_SOCKET) { wsa_assert (WSAGetLastError () == WSAEWOULDBLOCK || WSAGetLastError () == WSAECONNRESET || WSAGetLastError () == WSAEMFILE || WSAGetLastError () == WSAENOBUFS); return retired_fd; } #if !defined _WIN32_WCE // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) sock, HANDLE_FLAG_INHERIT, 0); win_assert (brc); #endif #else if (sock == -1) { errno_assert (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ECONNABORTED || errno == EPROTO || errno == ENOBUFS || errno == ENOMEM || errno == EMFILE || errno == ENFILE); return retired_fd; } #endif if (!options.tcp_accept_filters.empty ()) { bool matched = false; for (options_t::tcp_accept_filters_t::size_type i = 0; i != options.tcp_accept_filters.size (); ++i) { if (options.tcp_accept_filters[i].match_address ((struct sockaddr *) &ss, ss_len)) { matched = true; break; } } if (!matched) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (sock); wsa_assert (rc != SOCKET_ERROR); #else int rc = ::close (sock); errno_assert (rc == 0); #endif return retired_fd; } } return sock; }
int zmq::signaler_t::make_fdpair (fd_t *r_, fd_t *w_) { #if defined ZMQ_HAVE_EVENTFD // Create eventfd object. fd_t fd = eventfd (0, 0); errno_assert (fd != -1); *w_ = fd; *r_ = fd; return 0; #elif defined ZMQ_HAVE_WINDOWS // This function has to be in a system-wide critical section so that // two instances of the library don't accidentally create signaler // crossing the process boundary. // We'll use named event object to implement the critical section. // Note that if the event object already exists, the CreateEvent requests // EVENT_ALL_ACCESS access right. If this fails, we try to open // the event object asking for SYNCHRONIZE access only. HANDLE sync = CreateEvent (NULL, FALSE, TRUE, TEXT ("zmq-signaler-port-sync")); if (sync == NULL && GetLastError () == ERROR_ACCESS_DENIED) sync = OpenEvent (SYNCHRONIZE, FALSE, TEXT ("zmq-signaler-port-sync")); win_assert (sync != NULL); // Enter the critical section. DWORD dwrc = WaitForSingleObject (sync, INFINITE); zmq_assert (dwrc == WAIT_OBJECT_0); // Windows has no 'socketpair' function. CreatePipe is no good as pipe // handles cannot be polled on. Here we create the socketpair by hand. *w_ = INVALID_SOCKET; *r_ = INVALID_SOCKET; // Create listening socket. SOCKET listener; listener = open_socket (AF_INET, SOCK_STREAM, 0); wsa_assert (listener != INVALID_SOCKET); // Set SO_REUSEADDR and TCP_NODELAY on listening socket. BOOL so_reuseaddr = 1; int rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR, (char *)&so_reuseaddr, sizeof (so_reuseaddr)); wsa_assert (rc != SOCKET_ERROR); BOOL tcp_nodelay = 1; rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Bind listening socket to any free local port. struct sockaddr_in addr; memset (&addr, 0, sizeof (addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = htons (signaler_port); rc = bind (listener, (const struct sockaddr*) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Listen for incomming connections. rc = listen (listener, 1); wsa_assert (rc != SOCKET_ERROR); // Create the writer socket. *w_ = WSASocket (AF_INET, SOCK_STREAM, 0, NULL, 0, 0); wsa_assert (*w_ != INVALID_SOCKET); // Set TCP_NODELAY on writer socket. rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Connect writer to the listener. rc = connect (*w_, (sockaddr *) &addr, sizeof (addr)); wsa_assert (rc != SOCKET_ERROR); // Accept connection from writer. *r_ = accept (listener, NULL, NULL); wsa_assert (*r_ != INVALID_SOCKET); // We don't need the listening socket anymore. Close it. rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); // Exit the critical section. BOOL brc = SetEvent (sync); win_assert (brc != 0); return 0; #elif defined ZMQ_HAVE_OPENVMS // Whilst OpenVMS supports socketpair - it maps to AF_INET only. Further, // it does not set the socket options TCP_NODELAY and TCP_NODELACK which // can lead to performance problems. // // The bug will be fixed in V5.6 ECO4 and beyond. In the meantime, we'll // create the socket pair manually. sockaddr_in lcladdr; memset (&lcladdr, 0, sizeof (lcladdr)); lcladdr.sin_family = AF_INET; lcladdr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); lcladdr.sin_port = 0; int listener = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (listener != -1); int on = 1; int rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = bind(listener, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); socklen_t lcladdr_len = sizeof (lcladdr); rc = getsockname (listener, (struct sockaddr*) &lcladdr, &lcladdr_len); errno_assert (rc != -1); rc = listen (listener, 1); errno_assert (rc != -1); *w_ = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (*w_ != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = connect (*w_, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); *r_ = accept (listener, NULL, NULL); errno_assert (*r_ != -1); close (listener); return 0; #else // All other implementations support socketpair() int sv [2]; int rc = socketpair (AF_UNIX, SOCK_STREAM, 0, sv); errno_assert (rc == 0); *w_ = sv [0]; *r_ = sv [1]; return 0; #endif }
int zmq::tcp_listener_t::create_socket (const char *addr_) { _s = tcp_open_socket (addr_, options, true, true, &_address); if (_s == retired_fd) { return -1; } // TODO why is this only done for the listener? make_socket_noninheritable (_s); // Allow reusing of the address. int flag = 1; int rc; #ifdef ZMQ_HAVE_WINDOWS // TODO this was changed for Windows from SO_REUSEADDRE to // SE_EXCLUSIVEADDRUSE by 0ab65324195ad70205514d465b03d851a6de051c, // so the comment above is no longer correct; also, now the settings are // different between listener and connecter with a src address. // is this intentional? rc = setsockopt (_s, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, reinterpret_cast<const char *> (&flag), sizeof (int)); wsa_assert (rc != SOCKET_ERROR); #elif defined ZMQ_HAVE_VXWORKS rc = setsockopt (_s, SOL_SOCKET, SO_REUSEADDR, (char *) &flag, sizeof (int)); errno_assert (rc == 0); #else rc = setsockopt (_s, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof (int)); errno_assert (rc == 0); #endif // Bind the socket to the network interface and port. #if defined ZMQ_HAVE_VXWORKS rc = bind (_s, (sockaddr *) _address.addr (), _address.addrlen ()); #else rc = bind (_s, _address.addr (), _address.addrlen ()); #endif #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); goto error; } #else if (rc != 0) goto error; #endif // Listen for incoming connections. rc = listen (_s, options.backlog); #ifdef ZMQ_HAVE_WINDOWS if (rc == SOCKET_ERROR) { errno = wsa_error_to_errno (WSAGetLastError ()); goto error; } #else if (rc != 0) goto error; #endif return 0; error: int err = errno; close (); errno = err; return -1; }
zmq::fd_t zmq::tcp_listener_t::accept () { // The situation where connection cannot be accepted due to insufficient // resources is considered valid and treated by ignoring the connection. // Accept one connection and deal with different failure modes. zmq_assert (s != retired_fd); struct sockaddr_storage ss; memset (&ss, 0, sizeof (ss)); #ifdef ZMQ_HAVE_HPUX int ss_len = sizeof (ss); #else socklen_t ss_len = sizeof (ss); #endif #if defined ZMQ_HAVE_SOCK_CLOEXEC && defined HAVE_ACCEPT4 fd_t sock = ::accept4 (s, (struct sockaddr *) &ss, &ss_len, SOCK_CLOEXEC); #else fd_t sock = ::accept (s, (struct sockaddr *) &ss, &ss_len); #endif #ifdef ZMQ_HAVE_WINDOWS if (sock == INVALID_SOCKET) { const int last_error = WSAGetLastError (); wsa_assert (last_error == WSAEWOULDBLOCK || last_error == WSAECONNRESET || last_error == WSAEMFILE || last_error == WSAENOBUFS); return retired_fd; } #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) sock, HANDLE_FLAG_INHERIT, 0); win_assert (brc); #endif #else if (sock == -1) { errno_assert (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ECONNABORTED || errno == EPROTO || errno == ENOBUFS || errno == ENOMEM || errno == EMFILE || errno == ENFILE); return retired_fd; } #endif #if (!defined ZMQ_HAVE_SOCK_CLOEXEC || !defined HAVE_ACCEPT4) \ && defined FD_CLOEXEC // Race condition can cause socket not to be closed (if fork happens // between accept and this point). int rc = fcntl (sock, F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); #endif if (!options.tcp_accept_filters.empty ()) { bool matched = false; for (options_t::tcp_accept_filters_t::size_type i = 0; i != options.tcp_accept_filters.size (); ++i) { if (options.tcp_accept_filters[i].match_address ( (struct sockaddr *) &ss, ss_len)) { matched = true; break; } } if (!matched) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (sock); wsa_assert (rc != SOCKET_ERROR); #else int rc = ::close (sock); errno_assert (rc == 0); #endif return retired_fd; } } if (zmq::set_nosigpipe (sock)) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (sock); wsa_assert (rc != SOCKET_ERROR); #else int rc = ::close (sock); errno_assert (rc == 0); #endif return retired_fd; } // Set the IP Type-Of-Service priority for this client socket if (options.tos != 0) set_ip_type_of_service (sock, options.tos); return sock; }
zmq::fd_t zmq::tcp_listener_t::accept () { // The situation where connection cannot be accepted due to insufficient // resources is considered valid and treated by ignoring the connection. // Accept one connection and deal with different failure modes. zmq_assert (_s != retired_fd); struct sockaddr_storage ss; memset (&ss, 0, sizeof (ss)); #if defined ZMQ_HAVE_HPUX || defined ZMQ_HAVE_VXWORKS int ss_len = sizeof (ss); #else socklen_t ss_len = sizeof (ss); #endif #if defined ZMQ_HAVE_SOCK_CLOEXEC && defined HAVE_ACCEPT4 fd_t sock = ::accept4 (_s, reinterpret_cast<struct sockaddr *> (&ss), &ss_len, SOCK_CLOEXEC); #else fd_t sock = ::accept (_s, reinterpret_cast<struct sockaddr *> (&ss), &ss_len); #endif if (sock == retired_fd) { #if defined ZMQ_HAVE_WINDOWS const int last_error = WSAGetLastError (); wsa_assert (last_error == WSAEWOULDBLOCK || last_error == WSAECONNRESET || last_error == WSAEMFILE || last_error == WSAENOBUFS); #elif defined ZMQ_HAVE_ANDROID errno_assert (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ECONNABORTED || errno == EPROTO || errno == ENOBUFS || errno == ENOMEM || errno == EMFILE || errno == ENFILE || errno == EINVAL); #else errno_assert (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ECONNABORTED || errno == EPROTO || errno == ENOBUFS || errno == ENOMEM || errno == EMFILE || errno == ENFILE); #endif return retired_fd; } make_socket_noninheritable (sock); if (!options.tcp_accept_filters.empty ()) { bool matched = false; for (options_t::tcp_accept_filters_t::size_type i = 0; i != options.tcp_accept_filters.size (); ++i) { if (options.tcp_accept_filters[i].match_address ( reinterpret_cast<struct sockaddr *> (&ss), ss_len)) { matched = true; break; } } if (!matched) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (sock); wsa_assert (rc != SOCKET_ERROR); #else int rc = ::close (sock); errno_assert (rc == 0); #endif return retired_fd; } } if (zmq::set_nosigpipe (sock)) { #ifdef ZMQ_HAVE_WINDOWS int rc = closesocket (sock); wsa_assert (rc != SOCKET_ERROR); #else int rc = ::close (sock); errno_assert (rc == 0); #endif return retired_fd; } // Set the IP Type-Of-Service priority for this client socket if (options.tos != 0) set_ip_type_of_service (sock, options.tos); return sock; }
bool zmq::select_t::process_events (poller_t <select_t> *poller_, bool timers_) { // Intialise the pollsets. memcpy (&readfds, &source_set_in, sizeof source_set_in); memcpy (&writefds, &source_set_out, sizeof source_set_out); memcpy (&exceptfds, &source_set_err, sizeof source_set_err); // Wait for events. while (true) { // Compute the timout interval. Select is free to overwrite the // value so have to compute it each time anew. timeval timeout = {max_timer_period / 1000, (max_timer_period % 1000) * 1000}; // Wait for events. int rc; while (true) { rc = select (maxfd + 1, &readfds, &writefds, &exceptfds, timers_ ? &timeout : NULL); #ifdef ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); break; #else if (!(rc == -1 && errno == EINTR)) { errno_assert (rc != -1); break; } #endif } // Handle timer. if (timers_ && !rc) { poller_->timer_event (); return false; } // TODO: Select sometimes returns 0 even though no event have occured // and no timeout was set. Document this situation in detail... if (rc > 0) break; } // handle to be used as a parameter in in/out_event in process_event call. handle_t handle; for (fd_set_t::size_type i = 0; i < fds.size (); i ++) { if (fds [i].fd == retired_fd) continue; // Store actual fd into handle union. handle.fd = fds [i].fd; if (FD_ISSET (fds [i].fd, &writefds)) if (poller_->process_event (handle, fds [i].engine, event_out)) return true; if (fds [i].fd == retired_fd) continue; if (FD_ISSET (fds [i].fd, &readfds)) if (poller_->process_event (handle, fds [i].engine, event_in)) return true; if (fds [i].fd == retired_fd) continue; if (FD_ISSET (fds [i].fd, &exceptfds)) if (poller_->process_event (handle, fds [i].engine, event_err)) return true; } // Destroy retired event sources. if (retired) { for (fd_set_t::size_type i = 0; i < fds.size (); i ++) { if (fds [i].fd == retired_fd) { fds.erase (fds.begin () + i); i --; } } retired = false; } return false; }
int zmq::socket_poller_t::wait (zmq::socket_poller_t::event_t *events_, int n_events_, long timeout_) { if (items.empty () && timeout_ < 0) { errno = EFAULT; return -1; } if (need_rebuild) rebuild (); if (unlikely (poll_size == 0)) { // We'll report an error (timed out) as if the list was non-empty and // no event occurred within the specified timeout. Otherwise the caller // needs to check the return value AND the event to avoid using the // nullified event data. errno = EAGAIN; if (timeout_ == 0) return -1; #if defined ZMQ_HAVE_WINDOWS Sleep (timeout_ > 0 ? timeout_ : INFINITE); return -1; #elif defined ZMQ_HAVE_ANDROID usleep (timeout_ * 1000); return -1; #elif defined ZMQ_HAVE_OSX usleep (timeout_ * 1000); errno = EAGAIN; return -1; #else usleep (timeout_ * 1000); return -1; #endif } #if defined ZMQ_POLL_BASED_ON_POLL zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; bool first_pass = true; while (true) { // Compute the timeout for the subsequent poll. int timeout; if (first_pass) timeout = 0; else if (timeout_ < 0) timeout = -1; else timeout = end - now; // Wait for events. while (true) { int rc = poll (pollfds, poll_size, timeout); if (rc == -1 && errno == EINTR) { return -1; } errno_assert (rc >= 0); break; } // Receive the signal from pollfd if (use_signaler && pollfds[0].revents & POLLIN) signaler->recv (); // Check for the events. int found = check_events (events_, n_events_); if (found) { if (found > 0) zero_trail_events (events_, n_events_, found); return found; } // Adjust timeout or break if (adjust_timeout (clock, timeout_, now, end, first_pass) == 0) break; } errno = EAGAIN; return -1; #elif defined ZMQ_POLL_BASED_ON_SELECT zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; bool first_pass = true; fd_set inset, outset, errset; while (true) { // Compute the timeout for the subsequent poll. timeval timeout; timeval *ptimeout; if (first_pass) { timeout.tv_sec = 0; timeout.tv_usec = 0; ptimeout = &timeout; } else if (timeout_ < 0) ptimeout = NULL; else { timeout.tv_sec = (long) ((end - now) / 1000); timeout.tv_usec = (long) ((end - now) % 1000 * 1000); ptimeout = &timeout; } // Wait for events. Ignore interrupts if there's infinite timeout. while (true) { #if defined ZMQ_HAVE_WINDOWS // On Windows we don't need to copy the whole fd_set. // SOCKETS are continuous from the beginning of fd_array in fd_set. // We just need to copy fd_count elements of fd_array. // We gain huge memcpy() improvement if number of used SOCKETs is much lower than FD_SETSIZE. memcpy (&inset, &pollset_in, (char *) (pollset_in.fd_array + pollset_in.fd_count ) - (char *) &pollset_in ); memcpy (&outset, &pollset_out, (char *) (pollset_out.fd_array + pollset_out.fd_count) - (char *) &pollset_out); memcpy (&errset, &pollset_err, (char *) (pollset_err.fd_array + pollset_err.fd_count) - (char *) &pollset_err); int rc = select (0, &inset, &outset, &errset, ptimeout); if (unlikely (rc == SOCKET_ERROR)) { errno = zmq::wsa_error_to_errno (WSAGetLastError ()); wsa_assert (errno == ENOTSOCK); return -1; } #else memcpy (&inset, &pollset_in, sizeof (fd_set)); memcpy (&outset, &pollset_out, sizeof (fd_set)); memcpy (&errset, &pollset_err, sizeof (fd_set)); int rc = select (maxfd + 1, &inset, &outset, &errset, ptimeout); if (unlikely (rc == -1)) { errno_assert (errno == EINTR || errno == EBADF); return -1; } #endif break; } if (use_signaler && FD_ISSET (signaler->get_fd (), &inset)) signaler->recv (); // Check for the events. int found = check_events(events_, n_events_, inset, outset, errset); if (found) { if (found > 0) zero_trail_events (events_, n_events_, found); return found; } // Adjust timeout or break if (adjust_timeout (clock, timeout_, now, end, first_pass) == 0) break; } errno = EAGAIN; return -1; #else // Exotic platforms that support neither poll() nor select(). errno = ENOTSUP; return -1; #endif }
int zmq_poll (zmq_pollitem_t *items_, int nitems_, long timeout_) { if (!items_) { errno = EFAULT; return -1; } #if defined ZMQ_POLL_BASED_ON_POLL if (unlikely (nitems_ < 0)) { errno = EINVAL; return -1; } if (unlikely (nitems_ == 0)) { if (timeout_ == 0) return 0; #if defined ZMQ_HAVE_WINDOWS Sleep (timeout_ > 0 ? timeout_ : INFINITE); return 0; #elif defined ZMQ_HAVE_ANDROID usleep (timeout_ * 1000); return 0; #else return usleep (timeout_ * 1000); #endif } zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; pollfd *pollfds = (pollfd*) malloc (nitems_ * sizeof (pollfd)); alloc_assert (pollfds); // Build pollset for poll () system call. for (int i = 0; i != nitems_; i++) { // If the poll item is a 0MQ socket, we poll on the file descriptor // retrieved by the ZMQ_FD socket option. if (items_ [i].socket) { size_t zmq_fd_size = sizeof (zmq::fd_t); if (zmq_getsockopt (items_ [i].socket, ZMQ_FD, &pollfds [i].fd, &zmq_fd_size) == -1) { free (pollfds); return -1; } pollfds [i].events = items_ [i].events ? POLLIN : 0; } // Else, the poll item is a raw file descriptor. Just convert the // events to normal POLLIN/POLLOUT for poll (). else { pollfds [i].fd = items_ [i].fd; pollfds [i].events = (items_ [i].events & ZMQ_POLLIN ? POLLIN : 0) | (items_ [i].events & ZMQ_POLLOUT ? POLLOUT : 0); } } bool first_pass = true; int nevents = 0; while (true) { // Compute the timeout for the subsequent poll. int timeout; if (first_pass) timeout = 0; else if (timeout_ < 0) timeout = -1; else timeout = end - now; // Wait for events. while (true) { int rc = poll (pollfds, nitems_, timeout); if (rc == -1 && errno == EINTR) { free (pollfds); return -1; } errno_assert (rc >= 0); break; } // Check for the events. for (int i = 0; i != nitems_; i++) { items_ [i].revents = 0; // The poll item is a 0MQ socket. Retrieve pending events // using the ZMQ_EVENTS socket option. if (items_ [i].socket) { size_t zmq_events_size = sizeof (uint32_t); uint32_t zmq_events; if (zmq_getsockopt (items_ [i].socket, ZMQ_EVENTS, &zmq_events, &zmq_events_size) == -1) { free (pollfds); return -1; } if ((items_ [i].events & ZMQ_POLLOUT) && (zmq_events & ZMQ_POLLOUT)) items_ [i].revents |= ZMQ_POLLOUT; if ((items_ [i].events & ZMQ_POLLIN) && (zmq_events & ZMQ_POLLIN)) items_ [i].revents |= ZMQ_POLLIN; } // Else, the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. else { if (pollfds [i].revents & POLLIN) items_ [i].revents |= ZMQ_POLLIN; if (pollfds [i].revents & POLLOUT) items_ [i].revents |= ZMQ_POLLOUT; if (pollfds [i].revents & ~(POLLIN | POLLOUT)) items_ [i].revents |= ZMQ_POLLERR; } if (items_ [i].revents) nevents++; } // If timout is zero, exit immediately whether there are events or not. if (timeout_ == 0) break; // If there are events to return, we can exit immediately. if (nevents) break; // At this point we are meant to wait for events but there are none. // If timeout is infinite we can just loop until we get some events. if (timeout_ < 0) { if (first_pass) first_pass = false; continue; } // The timeout is finite and there are no events. In the first pass // we get a timestamp of when the polling have begun. (We assume that // first pass have taken negligible time). We also compute the time // when the polling should time out. if (first_pass) { now = clock.now_ms (); end = now + timeout_; if (now == end) break; first_pass = false; continue; } // Find out whether timeout have expired. now = clock.now_ms (); if (now >= end) break; } free (pollfds); return nevents; #elif defined ZMQ_POLL_BASED_ON_SELECT if (unlikely (nitems_ < 0)) { errno = EINVAL; return -1; } if (unlikely (nitems_ == 0)) { if (timeout_ == 0) return 0; #if defined ZMQ_HAVE_WINDOWS Sleep (timeout_ > 0 ? timeout_ : INFINITE); return 0; #else return usleep (timeout_ * 1000); #endif } zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; // Ensure we do not attempt to select () on more than FD_SETSIZE // file descriptors. zmq_assert (nitems_ <= FD_SETSIZE); fd_set pollset_in; FD_ZERO (&pollset_in); fd_set pollset_out; FD_ZERO (&pollset_out); fd_set pollset_err; FD_ZERO (&pollset_err); zmq::fd_t maxfd = 0; // Build the fd_sets for passing to select (). for (int i = 0; i != nitems_; i++) { // If the poll item is a 0MQ socket we are interested in input on the // notification file descriptor retrieved by the ZMQ_FD socket option. if (items_ [i].socket) { size_t zmq_fd_size = sizeof (zmq::fd_t); zmq::fd_t notify_fd; if (zmq_getsockopt (items_ [i].socket, ZMQ_FD, ¬ify_fd, &zmq_fd_size) == -1) return -1; if (items_ [i].events) { FD_SET (notify_fd, &pollset_in); if (maxfd < notify_fd) maxfd = notify_fd; } } // Else, the poll item is a raw file descriptor. Convert the poll item // events to the appropriate fd_sets. else { if (items_ [i].events & ZMQ_POLLIN) FD_SET (items_ [i].fd, &pollset_in); if (items_ [i].events & ZMQ_POLLOUT) FD_SET (items_ [i].fd, &pollset_out); if (items_ [i].events & ZMQ_POLLERR) FD_SET (items_ [i].fd, &pollset_err); if (maxfd < items_ [i].fd) maxfd = items_ [i].fd; } } bool first_pass = true; int nevents = 0; fd_set inset, outset, errset; while (true) { // Compute the timeout for the subsequent poll. timeval timeout; timeval *ptimeout; if (first_pass) { timeout.tv_sec = 0; timeout.tv_usec = 0; ptimeout = &timeout; } else if (timeout_ < 0) ptimeout = NULL; else { timeout.tv_sec = (long) ((end - now) / 1000); timeout.tv_usec = (long) ((end - now) % 1000 * 1000); ptimeout = &timeout; } // Wait for events. Ignore interrupts if there's infinite timeout. while (true) { memcpy (&inset, &pollset_in, sizeof (fd_set)); memcpy (&outset, &pollset_out, sizeof (fd_set)); memcpy (&errset, &pollset_err, sizeof (fd_set)); #if defined ZMQ_HAVE_WINDOWS int rc = select (0, &inset, &outset, &errset, ptimeout); if (unlikely (rc == SOCKET_ERROR)) { zmq::wsa_error_to_errno (); if (errno == ENOTSOCK) return -1; wsa_assert (false); } #else int rc = select (maxfd + 1, &inset, &outset, &errset, ptimeout); if (unlikely (rc == -1)) { if (errno == EINTR || errno == EBADF) return -1; errno_assert (false); } #endif break; } // Check for the events. for (int i = 0; i != nitems_; i++) { items_ [i].revents = 0; // The poll item is a 0MQ socket. Retrieve pending events // using the ZMQ_EVENTS socket option. if (items_ [i].socket) { size_t zmq_events_size = sizeof (uint32_t); uint32_t zmq_events; if (zmq_getsockopt (items_ [i].socket, ZMQ_EVENTS, &zmq_events, &zmq_events_size) == -1) return -1; if ((items_ [i].events & ZMQ_POLLOUT) && (zmq_events & ZMQ_POLLOUT)) items_ [i].revents |= ZMQ_POLLOUT; if ((items_ [i].events & ZMQ_POLLIN) && (zmq_events & ZMQ_POLLIN)) items_ [i].revents |= ZMQ_POLLIN; } // Else, the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. else { if (FD_ISSET (items_ [i].fd, &inset)) items_ [i].revents |= ZMQ_POLLIN; if (FD_ISSET (items_ [i].fd, &outset)) items_ [i].revents |= ZMQ_POLLOUT; if (FD_ISSET (items_ [i].fd, &errset)) items_ [i].revents |= ZMQ_POLLERR; } if (items_ [i].revents) nevents++; } // If timout is zero, exit immediately whether there are events or not. if (timeout_ == 0) break; // If there are events to return, we can exit immediately. if (nevents) break; // At this point we are meant to wait for events but there are none. // If timeout is infinite we can just loop until we get some events. if (timeout_ < 0) { if (first_pass) first_pass = false; continue; } // The timeout is finite and there are no events. In the first pass // we get a timestamp of when the polling have begun. (We assume that // first pass have taken negligible time). We also compute the time // when the polling should time out. if (first_pass) { now = clock.now_ms (); end = now + timeout_; if (now == end) break; first_pass = false; continue; } // Find out whether timeout have expired. now = clock.now_ms (); if (now >= end) break; } return nevents; #else // Exotic platforms that support neither poll() nor select(). errno = ENOTSUP; return -1; #endif }
int zmq::make_fdpair (fd_t *r_, fd_t *w_) { #if defined ZMQ_HAVE_EVENTFD int flags = 0; #if defined ZMQ_HAVE_EVENTFD_CLOEXEC // Setting this option result in sane behaviour when exec() functions // are used. Old sockets are closed and don't block TCP ports, avoid // leaks, etc. flags |= EFD_CLOEXEC; #endif fd_t fd = eventfd (0, flags); if (fd == -1) { errno_assert (errno == ENFILE || errno == EMFILE); *w_ = *r_ = -1; return -1; } else { *w_ = *r_ = fd; return 0; } #elif defined ZMQ_HAVE_WINDOWS #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP // Windows CE does not manage security attributes SECURITY_DESCRIPTOR sd; SECURITY_ATTRIBUTES sa; memset (&sd, 0, sizeof sd); memset (&sa, 0, sizeof sa); InitializeSecurityDescriptor (&sd, SECURITY_DESCRIPTOR_REVISION); SetSecurityDescriptorDacl (&sd, TRUE, 0, FALSE); sa.nLength = sizeof (SECURITY_ATTRIBUTES); sa.lpSecurityDescriptor = &sd; #endif // This function has to be in a system-wide critical section so that // two instances of the library don't accidentally create signaler // crossing the process boundary. // We'll use named event object to implement the critical section. // Note that if the event object already exists, the CreateEvent requests // EVENT_ALL_ACCESS access right. If this fails, we try to open // the event object asking for SYNCHRONIZE access only. HANDLE sync = NULL; // Create critical section only if using fixed signaler port // Use problematic Event implementation for compatibility if using old port 5905. // Otherwise use Mutex implementation. int event_signaler_port = 5905; if (signaler_port == event_signaler_port) { #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP sync = CreateEventW (&sa, FALSE, TRUE, L"Global\\zmq-signaler-port-sync"); #else sync = CreateEventW (NULL, FALSE, TRUE, L"Global\\zmq-signaler-port-sync"); #endif if (sync == NULL && GetLastError () == ERROR_ACCESS_DENIED) sync = OpenEventW (SYNCHRONIZE | EVENT_MODIFY_STATE, FALSE, L"Global\\zmq-signaler-port-sync"); win_assert (sync != NULL); } else if (signaler_port != 0) { wchar_t mutex_name[MAX_PATH]; #ifdef __MINGW32__ _snwprintf (mutex_name, MAX_PATH, L"Global\\zmq-signaler-port-%d", signaler_port); #else swprintf (mutex_name, MAX_PATH, L"Global\\zmq-signaler-port-%d", signaler_port); #endif #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP sync = CreateMutexW (&sa, FALSE, mutex_name); #else sync = CreateMutexW (NULL, FALSE, mutex_name); #endif if (sync == NULL && GetLastError () == ERROR_ACCESS_DENIED) sync = OpenMutexW (SYNCHRONIZE, FALSE, mutex_name); win_assert (sync != NULL); } // Windows has no 'socketpair' function. CreatePipe is no good as pipe // handles cannot be polled on. Here we create the socketpair by hand. *w_ = INVALID_SOCKET; *r_ = INVALID_SOCKET; // Create listening socket. SOCKET listener; listener = open_socket (AF_INET, SOCK_STREAM, 0); wsa_assert (listener != INVALID_SOCKET); // Set SO_REUSEADDR and TCP_NODELAY on listening socket. BOOL so_reuseaddr = 1; int rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR, (char *) &so_reuseaddr, sizeof so_reuseaddr); wsa_assert (rc != SOCKET_ERROR); tune_socket (listener); // Init sockaddr to signaler port. struct sockaddr_in addr; memset (&addr, 0, sizeof addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = htons (signaler_port); // Create the writer socket. *w_ = open_socket (AF_INET, SOCK_STREAM, 0); wsa_assert (*w_ != INVALID_SOCKET); // Set TCP_NODELAY on writer socket. tune_socket (*w_); if (sync != NULL) { // Enter the critical section. DWORD dwrc = WaitForSingleObject (sync, INFINITE); zmq_assert (dwrc == WAIT_OBJECT_0 || dwrc == WAIT_ABANDONED); } // Bind listening socket to signaler port. rc = bind (listener, (const struct sockaddr *) &addr, sizeof addr); if (rc != SOCKET_ERROR && signaler_port == 0) { // Retrieve ephemeral port number int addrlen = sizeof addr; rc = getsockname (listener, (struct sockaddr *) &addr, &addrlen); } // Listen for incoming connections. if (rc != SOCKET_ERROR) rc = listen (listener, 1); // Connect writer to the listener. if (rc != SOCKET_ERROR) rc = connect (*w_, (struct sockaddr *) &addr, sizeof addr); // Accept connection from writer. if (rc != SOCKET_ERROR) *r_ = accept (listener, NULL, NULL); // Send/receive large chunk to work around TCP slow start // This code is a workaround for #1608 if (*r_ != INVALID_SOCKET) { size_t dummy_size = 1024 * 1024; // 1M to overload default receive buffer unsigned char *dummy = (unsigned char *) malloc (dummy_size); wsa_assert (dummy); int still_to_send = (int) dummy_size; int still_to_recv = (int) dummy_size; while (still_to_send || still_to_recv) { int nbytes; if (still_to_send > 0) { nbytes = ::send (*w_, (char *) (dummy + dummy_size - still_to_send), still_to_send, 0); wsa_assert (nbytes != SOCKET_ERROR); still_to_send -= nbytes; } nbytes = ::recv (*r_, (char *) (dummy + dummy_size - still_to_recv), still_to_recv, 0); wsa_assert (nbytes != SOCKET_ERROR); still_to_recv -= nbytes; } free (dummy); } // Save errno if error occurred in bind/listen/connect/accept. int saved_errno = 0; if (*r_ == INVALID_SOCKET) saved_errno = WSAGetLastError (); // We don't need the listening socket anymore. Close it. rc = closesocket (listener); wsa_assert (rc != SOCKET_ERROR); if (sync != NULL) { // Exit the critical section. BOOL brc; if (signaler_port == event_signaler_port) brc = SetEvent (sync); else brc = ReleaseMutex (sync); win_assert (brc != 0); // Release the kernel object brc = CloseHandle (sync); win_assert (brc != 0); } if (*r_ != INVALID_SOCKET) { #if !defined _WIN32_WCE && !defined ZMQ_HAVE_WINDOWS_UWP // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) *r_, HANDLE_FLAG_INHERIT, 0); win_assert (brc); #endif return 0; } else { // Cleanup writer if connection failed if (*w_ != INVALID_SOCKET) { rc = closesocket (*w_); wsa_assert (rc != SOCKET_ERROR); *w_ = INVALID_SOCKET; } // Set errno from saved value errno = wsa_error_to_errno (saved_errno); return -1; } #elif defined ZMQ_HAVE_OPENVMS // Whilst OpenVMS supports socketpair - it maps to AF_INET only. Further, // it does not set the socket options TCP_NODELAY and TCP_NODELACK which // can lead to performance problems. // // The bug will be fixed in V5.6 ECO4 and beyond. In the meantime, we'll // create the socket pair manually. struct sockaddr_in lcladdr; memset (&lcladdr, 0, sizeof lcladdr); lcladdr.sin_family = AF_INET; lcladdr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); lcladdr.sin_port = 0; int listener = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (listener != -1); int on = 1; int rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); errno_assert (rc != -1); rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELACK, &on, sizeof on); errno_assert (rc != -1); rc = bind (listener, (struct sockaddr *) &lcladdr, sizeof lcladdr); errno_assert (rc != -1); socklen_t lcladdr_len = sizeof lcladdr; rc = getsockname (listener, (struct sockaddr *) &lcladdr, &lcladdr_len); errno_assert (rc != -1); rc = listen (listener, 1); errno_assert (rc != -1); *w_ = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (*w_ != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); errno_assert (rc != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELACK, &on, sizeof on); errno_assert (rc != -1); rc = connect (*w_, (struct sockaddr *) &lcladdr, sizeof lcladdr); errno_assert (rc != -1); *r_ = accept (listener, NULL, NULL); errno_assert (*r_ != -1); close (listener); return 0; #else // All other implementations support socketpair() int sv[2]; int type = SOCK_STREAM; // Setting this option result in sane behaviour when exec() functions // are used. Old sockets are closed and don't block TCP ports, avoid // leaks, etc. #if defined ZMQ_HAVE_SOCK_CLOEXEC type |= SOCK_CLOEXEC; #endif int rc = socketpair (AF_UNIX, type, 0, sv); if (rc == -1) { errno_assert (errno == ENFILE || errno == EMFILE); *w_ = *r_ = -1; return -1; } else { // If there's no SOCK_CLOEXEC, let's try the second best option. Note that // race condition can cause socket not to be closed (if fork happens // between socket creation and this point). #if !defined ZMQ_HAVE_SOCK_CLOEXEC && defined FD_CLOEXEC rc = fcntl (sv[0], F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); rc = fcntl (sv[1], F_SETFD, FD_CLOEXEC); errno_assert (rc != -1); #endif *w_ = sv[0]; *r_ = sv[1]; return 0; } #endif }
// Returns -1 if we could not make the socket pair successfully int zmq::signaler_t::make_fdpair (fd_t *r_, fd_t *w_) { #if defined ZMQ_HAVE_EVENTFD fd_t fd = eventfd (0, 0); if (fd == -1) { errno_assert (errno == ENFILE || errno == EMFILE); *w_ = *r_ = -1; return -1; } else { *w_ = *r_ = fd; return 0; } #elif defined ZMQ_HAVE_WINDOWS # if !defined _WIN32_WCE // Windows CE does not manage security attributes SECURITY_DESCRIPTOR sd; SECURITY_ATTRIBUTES sa; memset (&sd, 0, sizeof (sd)); memset (&sa, 0, sizeof (sa)); InitializeSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION); SetSecurityDescriptorDacl(&sd, TRUE, 0, FALSE); sa.nLength = sizeof(SECURITY_ATTRIBUTES); sa.lpSecurityDescriptor = &sd; # endif // This function has to be in a system-wide critical section so that // two instances of the library don't accidentally create signaler // crossing the process boundary. // We'll use named event object to implement the critical section. // Note that if the event object already exists, the CreateEvent requests // EVENT_ALL_ACCESS access right. If this fails, we try to open // the event object asking for SYNCHRONIZE access only. HANDLE sync = NULL; // Create critical section only if using fixed signaler port // Use problematic Event implementation for compatibility if using old port 5905. // Otherwise use Mutex implementation. int event_signaler_port = 5905; if (signaler_port == event_signaler_port) { # if !defined _WIN32_WCE sync = CreateEvent (&sa, FALSE, TRUE, TEXT ("Global\\zmq-signaler-port-sync")); # else sync = CreateEvent (NULL, FALSE, TRUE, TEXT ("Global\\zmq-signaler-port-sync")); # endif if (sync == NULL && GetLastError () == ERROR_ACCESS_DENIED) sync = OpenEvent (SYNCHRONIZE | EVENT_MODIFY_STATE, FALSE, TEXT ("Global\\zmq-signaler-port-sync")); win_assert (sync != NULL); } else if (signaler_port != 0) { TCHAR mutex_name[64]; /* VC++ v120 swprintf has been changed to conform with the ISO C standard, adding an extra character count parameter. */ _stprintf (mutex_name, TEXT ("Global\\zmq-signaler-port-%d"), signaler_port); # if !defined _WIN32_WCE sync = CreateMutex (&sa, FALSE, mutex_name); # else sync = CreateMutex (NULL, FALSE, mutex_name); # endif if (sync == NULL && GetLastError () == ERROR_ACCESS_DENIED) sync = OpenMutex (SYNCHRONIZE, FALSE, mutex_name); win_assert (sync != NULL); } // Windows has no 'socketpair' function. CreatePipe is no good as pipe // handles cannot be polled on. Here we create the socketpair by hand. *w_ = INVALID_SOCKET; *r_ = INVALID_SOCKET; // Create listening socket. SOCKET listener; listener = open_socket (AF_INET, SOCK_STREAM, 0); wsa_assert (listener != INVALID_SOCKET); // Set SO_REUSEADDR and TCP_NODELAY on listening socket. BOOL so_reuseaddr = 1; int rc = setsockopt (listener, SOL_SOCKET, SO_REUSEADDR, (char *)&so_reuseaddr, sizeof (so_reuseaddr)); wsa_assert (rc != SOCKET_ERROR); BOOL tcp_nodelay = 1; rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); // Init sockaddr to signaler port. struct sockaddr_in addr; memset (&addr, 0, sizeof (addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = htons (signaler_port); // Create the writer socket. *w_ = open_socket (AF_INET, SOCK_STREAM, 0); wsa_assert (*w_ != INVALID_SOCKET); // Set TCP_NODELAY on writer socket. rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, (char *)&tcp_nodelay, sizeof (tcp_nodelay)); wsa_assert (rc != SOCKET_ERROR); if (sync != NULL) { // Enter the critical section. DWORD dwrc = WaitForSingleObject (sync, INFINITE); zmq_assert (dwrc == WAIT_OBJECT_0 || dwrc == WAIT_ABANDONED); } // Bind listening socket to signaler port. rc = bind (listener, (const struct sockaddr*) &addr, sizeof (addr)); if (rc != SOCKET_ERROR && signaler_port == 0) { // Retrieve ephemeral port number int addrlen = sizeof (addr); rc = getsockname (listener, (struct sockaddr*) &addr, &addrlen); } // Listen for incoming connections. if (rc != SOCKET_ERROR) rc = listen (listener, 1); // Connect writer to the listener. if (rc != SOCKET_ERROR) rc = connect (*w_, (struct sockaddr*) &addr, sizeof (addr)); // Accept connection from writer. if (rc != SOCKET_ERROR) *r_ = accept (listener, NULL, NULL); // Save errno if error occurred in bind/listen/connect/accept. int saved_errno = 0; if (*r_ == INVALID_SOCKET) saved_errno = WSAGetLastError (); // We don't need the listening socket anymore. Close it. closesocket (listener); if (sync != NULL) { // Exit the critical section. BOOL brc; if (signaler_port == event_signaler_port) brc = SetEvent (sync); else brc = ReleaseMutex (sync); win_assert (brc != 0); // Release the kernel object brc = CloseHandle (sync); win_assert (brc != 0); } if (*r_ != INVALID_SOCKET) { # if !defined _WIN32_WCE // On Windows, preventing sockets to be inherited by child processes. BOOL brc = SetHandleInformation ((HANDLE) *r_, HANDLE_FLAG_INHERIT, 0); win_assert (brc); # endif return 0; } else { // Cleanup writer if connection failed if (*w_ != INVALID_SOCKET) { rc = closesocket (*w_); wsa_assert (rc != SOCKET_ERROR); *w_ = INVALID_SOCKET; } // Set errno from saved value errno = wsa_error_to_errno (saved_errno); return -1; } #elif defined ZMQ_HAVE_OPENVMS // Whilst OpenVMS supports socketpair - it maps to AF_INET only. Further, // it does not set the socket options TCP_NODELAY and TCP_NODELACK which // can lead to performance problems. // // The bug will be fixed in V5.6 ECO4 and beyond. In the meantime, we'll // create the socket pair manually. struct sockaddr_in lcladdr; memset (&lcladdr, 0, sizeof (lcladdr)); lcladdr.sin_family = AF_INET; lcladdr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); lcladdr.sin_port = 0; int listener = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (listener != -1); int on = 1; int rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (listener, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = bind (listener, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); socklen_t lcladdr_len = sizeof (lcladdr); rc = getsockname (listener, (struct sockaddr*) &lcladdr, &lcladdr_len); errno_assert (rc != -1); rc = listen (listener, 1); errno_assert (rc != -1); *w_ = open_socket (AF_INET, SOCK_STREAM, 0); errno_assert (*w_ != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELAY, &on, sizeof (on)); errno_assert (rc != -1); rc = setsockopt (*w_, IPPROTO_TCP, TCP_NODELACK, &on, sizeof (on)); errno_assert (rc != -1); rc = connect (*w_, (struct sockaddr*) &lcladdr, sizeof (lcladdr)); errno_assert (rc != -1); *r_ = accept (listener, NULL, NULL); errno_assert (*r_ != -1); close (listener); return 0; #else // All other implementations support socketpair() int sv [2]; int rc = socketpair (AF_UNIX, SOCK_STREAM, 0, sv); if (rc == -1) { errno_assert (errno == ENFILE || errno == EMFILE); *w_ = *r_ = -1; return -1; } else { *w_ = sv [0]; *r_ = sv [1]; return 0; } #endif }
int nn_device_twoway (struct nn_device_recipe *device, int s1, nn_fd s1rcv, nn_fd s1snd, int s2, nn_fd s2rcv, nn_fd s2snd) { int rc; fd_set fds; int s1rcv_isready = 0; int s1snd_isready = 0; int s2rcv_isready = 0; int s2snd_isready = 0; /* Initialise the pollset. */ FD_ZERO (&fds); while (1) { /* Wait for network events. Adjust the 'ready' events based on the result. */ if (s1rcv_isready) FD_CLR (s1rcv, &fds); else FD_SET (s1rcv, &fds); if (s1snd_isready) FD_CLR (s1snd, &fds); else FD_SET (s1snd, &fds); if (s2rcv_isready) FD_CLR (s2rcv, &fds); else FD_SET (s2rcv, &fds); if (s2snd_isready) FD_CLR (s2snd, &fds); else FD_SET (s2snd, &fds); rc = select (0, &fds, NULL, NULL, NULL); wsa_assert (rc != SOCKET_ERROR); if (FD_ISSET (s1rcv, &fds)) s1rcv_isready = 1; if (FD_ISSET (s1snd, &fds)) s1snd_isready = 1; if (FD_ISSET (s2rcv, &fds)) s2rcv_isready = 1; if (FD_ISSET (s2snd, &fds)) s2snd_isready = 1; /* If possible, pass the message from s1 to s2. */ if (s1rcv_isready && s2snd_isready) { rc = nn_device_mvmsg (device,s1, s2, NN_DONTWAIT); if (nn_slow (rc < 0)) return -1; s1rcv_isready = 0; s2snd_isready = 0; } /* If possible, pass the message from s2 to s1. */ if (s2rcv_isready && s1snd_isready) { rc = nn_device_mvmsg (device,s2, s1, NN_DONTWAIT); if (nn_slow (rc < 0)) return -1; s2rcv_isready = 0; s1snd_isready = 0; } } }
void zmq::pgm_socket_t::open_transport () { SOCKADDR_IN salocal; SOCKADDR_IN sasession; zmq_log (1, "Opening PGM: network %s, port %i, %s(%i)\n", network, port_number, __FILE__, __LINE__); // Check if the machine supports PGM. int protocol_list[] = { IPPROTO_RM, 0 }; WSAPROTOCOL_INFOW* lpProtocolBuf = NULL; DWORD dwBufLen = 0; int err; int protocols = WSCEnumProtocols (protocol_list, lpProtocolBuf, &dwBufLen, &err); if (protocols != SOCKET_ERROR) assert (false); else if (err != WSAENOBUFS) assert (false); // Allocate needed space for lpProtocolBuf. lpProtocolBuf = (WSAPROTOCOL_INFOW*)malloc (dwBufLen); if (lpProtocolBuf == NULL) assert (false); // Get information about available protocols, the information will be // placed to lpProtocolBuf. protocols = WSCEnumProtocols (protocol_list, lpProtocolBuf, &dwBufLen, &err); if (SOCKET_ERROR == protocols) { free (lpProtocolBuf); assert (false); } bool found = FALSE; for (int i = 0; i < protocols; i++) { if (AF_INET == lpProtocolBuf[i].iAddressFamily && IPPROTO_RM == lpProtocolBuf[i].iProtocol && SOCK_RDM == lpProtocolBuf[i].iSocketType) { found = TRUE; break; } } if (!found) { fprintf (stderr, "PGM support is not installed on this machine."); free (lpProtocolBuf); assert (false); } free (lpProtocolBuf); // Receiver transport. if (receiver) { receiver_listener_socket = socket (AF_INET, SOCK_RDM, IPPROTO_RM); wsa_assert (receiver_listener_socket != INVALID_SOCKET); // The bind port (port_number) specified should match that // which the sender specified in the connect call. salocal.sin_family = AF_INET; salocal.sin_port = htons (port_number); salocal.sin_addr.s_addr = inet_addr (multicast); int rc = bind (receiver_listener_socket, (SOCKADDR *) &salocal, sizeof(salocal)); wsa_assert (rc != SOCKET_ERROR); rc = listen (receiver_listener_socket, 10); wsa_assert (rc != SOCKET_ERROR); // Set the socket to non-blocking mode. u_long flag = 1; rc = ioctlsocket (receiver_listener_socket, FIONBIO, &flag); wsa_assert (rc != SOCKET_ERROR); // Sender transport. } else { sender_socket = socket (AF_INET, SOCK_RDM, IPPROTO_RM); wsa_assert (sender_socket != INVALID_SOCKET); salocal.sin_family = AF_INET; salocal.sin_port = htons (0); // Port is ignored here salocal.sin_addr.s_addr = htonl (INADDR_ANY); int rc = bind (sender_socket, (SOCKADDR *)&salocal, sizeof(salocal)); wsa_assert (rc != SOCKET_ERROR); int to_preallocate = 0; // Set socket options. ULONG val = 1; setsockopt (sender_socket, IPPROTO_RM, RM_HIGH_SPEED_INTRANET_OPT, (char*)&val, sizeof(val)); // Set the socket to non-blocking mode. u_long flag = 1; rc = ioctlsocket (sender_socket, FIONBIO, &flag); wsa_assert (rc != SOCKET_ERROR); // Set the outgoing interface. ULONG send_iface; send_iface = inet_addr (network); rc = setsockopt (sender_socket, IPPROTO_RM, RM_SET_SEND_IF, (char *)&send_iface, sizeof(send_iface)); wsa_assert (rc != SOCKET_ERROR); // Set window size. // Parameter WindowSizeInBytes is calculated automaticaly as // send_window.WindowSizeInBytes = // send_window.RateKbitsPerSec * send_window.WindowSizeInMSecs / 8. assert (pgm_max_rte >= 1000); assert (pgm_secs != 0); RM_SEND_WINDOW send_window; send_window.RateKbitsPerSec = (pgm_max_rte / 1024) * 8; send_window.WindowSizeInMSecs = pgm_secs * 1000; // Parameter WindowSizeInBytes is calculated automaticaly as: send_window.WindowSizeInBytes = send_window.RateKbitsPerSec * send_window.WindowSizeInMSecs / 8; // Set multicast address and port number. sasession.sin_family = AF_INET; sasession.sin_port = htons (port_number); sasession.sin_addr.s_addr = inet_addr (multicast); rc = setsockopt (sender_socket, IPPROTO_RM, RM_RATE_WINDOW_SIZE, (char *) &send_window, sizeof (send_window)); wsa_assert (rc != SOCKET_ERROR); rc = connect (sender_socket, (SOCKADDR *)&sasession, sizeof(sasession)); wsa_assert (rc != SOCKET_ERROR); } }
int zmq::socket_poller_t::wait (zmq::socket_poller_t::event_t *events_, int n_events_, long timeout_) { if (need_rebuild) if (rebuild () == -1) return -1; #if defined ZMQ_POLL_BASED_ON_POLL if (unlikely (poll_size == 0)) { // We'll report an error (timed out) as if the list was non-empty and // no event occured within the specified timeout. Otherwise the caller // needs to check the return value AND the event to avoid using the // nullified event data. errno = ETIMEDOUT; if (timeout_ == 0) return -1; #if defined ZMQ_HAVE_WINDOWS Sleep (timeout_ > 0 ? timeout_ : INFINITE); return -1; #elif defined ZMQ_HAVE_ANDROID usleep (timeout_ * 1000); return -1; #else usleep (timeout_ * 1000); return -1; #endif } zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; bool first_pass = true; while (true) { // Compute the timeout for the subsequent poll. int timeout; if (first_pass) timeout = 0; else if (timeout_ < 0) timeout = -1; else timeout = end - now; // Wait for events. while (true) { int rc = poll (pollfds, poll_size, timeout); if (rc == -1 && errno == EINTR) { return -1; } errno_assert (rc >= 0); break; } // Receive the signal from pollfd if (use_signaler && pollfds[0].revents & POLLIN) signaler->recv (); // Check for the events. int found = 0; for (items_t::iterator it = items.begin (); it != items.end () && found < n_events_; ++it) { events_[found].socket = NULL; events_[found].fd = 0; events_[found].user_data = NULL; events_[found].events = 0; // The poll item is a 0MQ socket. Retrieve pending events // using the ZMQ_EVENTS socket option. if (it->socket) { size_t events_size = sizeof (uint32_t); uint32_t events; if (it->socket->getsockopt (ZMQ_EVENTS, &events, &events_size) == -1) { return -1; } if (it->events & events) { events_[found].socket = it->socket; events_[found].user_data = it->user_data; events_[found].events = it->events & events; ++found; } } // Else, the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. else { short revents = pollfds [it->pollfd_index].revents; short events = 0; if (revents & POLLIN) events |= ZMQ_POLLIN; if (revents & POLLOUT) events |= ZMQ_POLLOUT; if (revents & POLLPRI) events |= ZMQ_POLLPRI; if (revents & ~(POLLIN | POLLOUT | POLLPRI)) events |= ZMQ_POLLERR; if (events) { events_[found].socket = NULL; events_[found].user_data = it->user_data; events_[found].fd = it->fd; events_[found].events = events; ++found; } } } if (found) { for (int i = found; i < n_events_; ++i) { events_[i].socket = NULL; events_[i].fd = 0; events_[i].user_data = NULL; events_[i].events = 0; } return found; } // If timeout is zero, exit immediately whether there are events or not. if (timeout_ == 0) break; // At this point we are meant to wait for events but there are none. // If timeout is infinite we can just loop until we get some events. if (timeout_ < 0) { if (first_pass) first_pass = false; continue; } // The timeout is finite and there are no events. In the first pass // we get a timestamp of when the polling have begun. (We assume that // first pass have taken negligible time). We also compute the time // when the polling should time out. if (first_pass) { now = clock.now_ms (); end = now + timeout_; if (now == end) break; first_pass = false; continue; } // Find out whether timeout have expired. now = clock.now_ms (); if (now >= end) break; } errno = ETIMEDOUT; return -1; #elif defined ZMQ_POLL_BASED_ON_SELECT if (unlikely (poll_size == 0)) { // We'll report an error (timed out) as if the list was non-empty and // no event occured within the specified timeout. Otherwise the caller // needs to check the return value AND the event to avoid using the // nullified event data. errno = ETIMEDOUT; if (timeout_ == 0) return -1; #if defined ZMQ_HAVE_WINDOWS Sleep (timeout_ > 0 ? timeout_ : INFINITE); return -1; #else usleep (timeout_ * 1000); return -1; #endif } zmq::clock_t clock; uint64_t now = 0; uint64_t end = 0; bool first_pass = true; fd_set inset, outset, errset; while (true) { // Compute the timeout for the subsequent poll. timeval timeout; timeval *ptimeout; if (first_pass) { timeout.tv_sec = 0; timeout.tv_usec = 0; ptimeout = &timeout; } else if (timeout_ < 0) ptimeout = NULL; else { timeout.tv_sec = (long) ((end - now) / 1000); timeout.tv_usec = (long) ((end - now) % 1000 * 1000); ptimeout = &timeout; } // Wait for events. Ignore interrupts if there's infinite timeout. while (true) { memcpy (&inset, &pollset_in, sizeof (fd_set)); memcpy (&outset, &pollset_out, sizeof (fd_set)); memcpy (&errset, &pollset_err, sizeof (fd_set)); #if defined ZMQ_HAVE_WINDOWS int rc = select (0, &inset, &outset, &errset, ptimeout); if (unlikely (rc == SOCKET_ERROR)) { errno = zmq::wsa_error_to_errno (WSAGetLastError ()); wsa_assert (errno == ENOTSOCK); return -1; } #else int rc = select (maxfd + 1, &inset, &outset, &errset, ptimeout); if (unlikely (rc == -1)) { errno_assert (errno == EINTR || errno == EBADF); return -1; } #endif break; } if (use_signaler && FD_ISSET (signaler->get_fd (), &inset)) signaler->recv (); // Check for the events. int found = 0; for (items_t::iterator it = items.begin (); it != items.end () && found < n_events_; ++it) { // The poll item is a 0MQ socket. Retrieve pending events // using the ZMQ_EVENTS socket option. if (it->socket) { size_t events_size = sizeof (uint32_t); uint32_t events; if (it->socket->getsockopt (ZMQ_EVENTS, &events, &events_size) == -1) return -1; if (it->events & events) { events_[found].socket = it->socket; events_[found].user_data = it->user_data; events_[found].events = it->events & events; ++found; } } // Else, the poll item is a raw file descriptor, simply convert // the events to zmq_pollitem_t-style format. else { short events = 0; if (FD_ISSET (it->fd, &inset)) events |= ZMQ_POLLIN; if (FD_ISSET (it->fd, &outset)) events |= ZMQ_POLLOUT; if (FD_ISSET (it->fd, &errset)) events |= ZMQ_POLLERR; if (events) { events_[found].socket = NULL; events_[found].user_data = it->user_data; events_[found].fd = it->fd; events_[found].events = events; ++found; } } } if (found) { // zero-out remaining events for (int i = found; i < n_events_; ++i) { events_[i].socket = NULL; events_[i].fd = 0; events_[i].user_data = NULL; events_[i].events = 0; } return found; } // If timeout is zero, exit immediately whether there are events or not. if (timeout_ == 0) break; // At this point we are meant to wait for events but there are none. // If timeout is infinite we can just loop until we get some events. if (timeout_ < 0) { if (first_pass) first_pass = false; continue; } // The timeout is finite and there are no events. In the first pass // we get a timestamp of when the polling have begun. (We assume that // first pass have taken negligible time). We also compute the time // when the polling should time out. if (first_pass) { now = clock.now_ms (); end = now + timeout_; if (now == end) break; first_pass = false; continue; } // Find out whether timeout have expired. now = clock.now_ms (); if (now >= end) break; } errno = ETIMEDOUT; return -1; #else // Exotic platforms that support neither poll() nor select(). errno = ENOTSUP; return -1; #endif }