int FileRepConnServer_ReceiveMessageType( FileRepConsumerProcIndex_e *fileRepMessageType) { char messageType; int status = STATUS_OK; messageType = pq_getbyte(); switch (messageType) { case '1': *fileRepMessageType = FileRepMessageTypeXLog; break; case '2': *fileRepMessageType = FileRepMessageTypeAO01; break; case '3': *fileRepMessageType = FileRepMessageTypeWriter; break; case 'S': *fileRepMessageType = FileRepMessageTypeShutdown; break; case EOF: ereport(WARNING, (errcode_for_socket_access(), errmsg("receive EOF on connection: %m"))); status = STATUS_ERROR; break; case 'X': /* Close Message(sent by PQfinish()) */ /* * Client closed connection. Client does not wait for response. */ ereport(WARNING, (errcode_for_socket_access(), errmsg("receive close on connection: %m"))); status = STATUS_ERROR; break; default: ereport(WARNING, (errcode_for_socket_access(), errmsg("receive unexpected message type on connection: %m"))); status = STATUS_ERROR; break; } return status; }
int FileRepConnServer_CreateConnection() { int status = STATUS_OK; port = (Port *) calloc(1, sizeof(Port)); if (port == NULL) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory to create connection")))); return status; } status = StreamConnection(listenSocket[0], port); if (status != STATUS_OK) { ereport(WARNING, (errcode_for_socket_access(), errmsg("could not accept connection: %m"), FileRep_errcontext())); if (port->sock >= 0) { StreamClose(port->sock); } ConnFree(); } else { /* * MPP-14225: On NIC failure, filerep receiver process's recv() system * call will take hours to timeout, depending on the TCP timeout. Add * SO_RCVTIMEO timeout to filerep receiver process's socket to avoid * this. */ struct timeval tv; tv.tv_sec = file_rep_socket_timeout; tv.tv_usec = 0; /* Not initializing this can cause strange * errors */ if (setsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(struct timeval)) == -1) ereport(WARNING, (errcode_for_socket_access(), errmsg("could not set receive timeout on socket"))); /* set TCP keep-alive parameters for FileRep connection */ (void) pq_setkeepalivesidle(gp_filerep_tcp_keepalives_idle, port); (void) pq_setkeepalivesinterval(gp_filerep_tcp_keepalives_interval, port); (void) pq_setkeepalivescount(gp_filerep_tcp_keepalives_count, port); MyProcPort = port; } return status; }
/* * Wait until we can read WAL stream, or timeout. * * Returns true if data has become available for reading, false if timed out * or interrupted by signal. * * This is based on pqSocketCheck. */ static bool libpq_select(int timeout_ms) { int ret; Assert(streamConn != NULL); if (PQsocket(streamConn) < 0) ereport(ERROR, (errcode_for_socket_access(), errmsg("socket not open"))); /* We use poll(2) if available, otherwise select(2) */ { #ifdef HAVE_POLL struct pollfd input_fd; input_fd.fd = PQsocket(streamConn); input_fd.events = POLLIN | POLLERR; input_fd.revents = 0; ret = poll(&input_fd, 1, timeout_ms); #else /* !HAVE_POLL */ fd_set input_mask; struct timeval timeout; struct timeval *ptr_timeout; FD_ZERO(&input_mask); FD_SET(PQsocket(streamConn), &input_mask); if (timeout_ms < 0) ptr_timeout = NULL; else { timeout.tv_sec = timeout_ms / 1000; timeout.tv_usec = (timeout_ms % 1000) * 1000; ptr_timeout = &timeout; } ret = select(PQsocket(streamConn) + 1, &input_mask, NULL, NULL, ptr_timeout); #endif /* HAVE_POLL */ } if (ret == 0 || (ret < 0 && errno == EINTR)) return false; if (ret < 0) ereport(ERROR, (errcode_for_socket_access(), errmsg("select() failed: %m"))); return true; }
int FileRepConnServer_StartListener( char *hostAddress, int portLocal) { int status = STATUS_OK; int i; for (i=0; i < FILEREP_MAX_LISTEN; i++) { listenSocket[i] = -1; } /* NOTE check if family AF_UNIX has to be considered as well */ status = StreamServerPort( AF_UNSPEC, hostAddress, (unsigned short) portLocal, NULL, listenSocket, FILEREP_MAX_LISTEN); if (status != STATUS_OK) { ereport(WARNING, (errcode_for_socket_access(), errmsg("could not start listener, host:'%s' port:'%d': %m", hostAddress, portLocal), errSendAlert(true), FileRep_errcontext())); } return status; }
int FileRepConnServer_ReceiveMessageLength(uint32 *len) { int32 length; if (pq_getbytes((char*) &length, 4) == EOF) { ereport(WARNING, (errcode_for_socket_access(), errmsg("receive EOF on connection: %m"))); return STATUS_ERROR; } length = ntohl(length); if (length < 4) { ereport(WARNING, (errmsg("receive unexpected message length on connection"))); return STATUS_ERROR; } length -= 4; *len = length; return STATUS_OK; }
/* * set_connection_status_bad does not remove the given connection from the connection hash. * It simply shuts down the underlying socket. On success, it returns true. */ Datum set_connection_status_bad(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); int socket = -1; int shutdownStatus = 0; int pqStatus PG_USED_FOR_ASSERTS_ONLY = 0; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_BOOL(false); } /* Prevent further reads/writes... */ socket = PQsocket(connection); shutdownStatus = shutdown(socket, SHUT_RDWR); if (shutdownStatus != 0) { ereport(ERROR, (errcode_for_socket_access(), errmsg("shutdown failed"))); } /* ... and make libpq notice by reading data. */ pqStatus = PQconsumeInput(connection); Assert(pqStatus == 0); /* expect failure */ PG_RETURN_BOOL(true); }
/* -------------------------------- * pq_recvbuf - load some bytes into the input buffer * * returns 0 if OK, EOF if trouble * -------------------------------- */ static int pq_recvbuf(void) { if (PqRecvPointer > 0) { if (PqRecvLength > PqRecvPointer) { /* still some unread data, left-justify it in the buffer */ memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer, PqRecvLength - PqRecvPointer); PqRecvLength -= PqRecvPointer; PqRecvPointer = 0; } else PqRecvLength = PqRecvPointer = 0; } /* Ensure that we're in blocking mode */ pq_set_nonblocking(false); /* Can fill buffer from PqRecvLength and upwards */ for (;;) { int r; r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength, PQ_RECV_BUFFER_SIZE - PqRecvLength); if (r < 0) { if (errno == EINTR) continue; /* Ok if interrupted */ /* * Careful: an ereport() that tries to write to the client would * cause recursion to here, leading to stack overflow and core * dump! This message must go *only* to the postmaster log. */ ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); return EOF; } if (r == 0) { /* * EOF detected. We used to write a log message here, but it's * better to expect the ultimate caller to do that. */ return EOF; } /* r contains number of bytes read, so just incr length */ PqRecvLength += r; return 0; } }
int FileRepConnClient_EstablishConnection( char *hostAddress, int port, bool reportError) { int status = STATUS_OK; char portbuf[11]; char timeoutbuf[11]; const char *keys[5]; const char *vals[5]; /* FileRepConnClient_CloseConnection();*/ snprintf(portbuf, sizeof(portbuf), "%d", port); snprintf(timeoutbuf, sizeof(timeoutbuf), "%d", gp_segment_connect_timeout); keys[0] = "host"; vals[0] = hostAddress; keys[1] = "port"; vals[1] = portbuf; keys[2] = "dbname"; vals[2] = "postgres"; keys[3] = "connect_timeout"; vals[3] = timeoutbuf; keys[4] = NULL; vals[4] = NULL; filerep_conn = PQconnectdbParams(keys, vals, false); if (PQstatus(filerep_conn) != CONNECTION_OK) { if (reportError || Debug_filerep_print) ereport(WARNING, (errcode_for_socket_access(), errmsg("could not establish connection with server, host:'%s' port:'%d' err:'%s' : %m", hostAddress, port, PQerrorMessage(filerep_conn)), errSendAlert(true), FileRep_errcontext())); status = STATUS_ERROR; if (filerep_conn) { PQfinish(filerep_conn); filerep_conn = NULL; } } /* NOTE Handle error message see ftsprobe.c */ return status; }
/* -------------------------------- * pool_recvbuf - load some bytes into the input buffer * * returns 0 if OK, EOF if trouble * -------------------------------- */ static int pool_recvbuf(PoolPort *port) { if (port->RecvPointer > 0) { if (port->RecvLength > port->RecvPointer) { /* still some unread data, left-justify it in the buffer */ memmove(port->RecvBuffer, port->RecvBuffer + port->RecvPointer, port->RecvLength - port->RecvPointer); port->RecvLength -= port->RecvPointer; port->RecvPointer = 0; } else port->RecvLength = port->RecvPointer = 0; } /* Can fill buffer from PqRecvLength and upwards */ for (;;) { int r; r = recv(Socket(*port), port->RecvBuffer + port->RecvLength, POOL_BUFFER_SIZE - port->RecvLength, 0); if (r < 0) { if (errno == EINTR) continue; /* Ok if interrupted */ /* * Report broken connection */ ereport(LOG, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); return EOF; } if (r == 0) { /* * EOF detected. We used to write a log message here, but it's * better to expect the ultimate caller to do that. */ return EOF; } /* r contains number of bytes read, so just incr length */ port->RecvLength += r; return 0; } }
static int internal_flush(void) { static int last_reported_send_errno = 0; char *bufptr = PqSendBuffer; char *bufend = PqSendBuffer + PqSendPointer; while (bufptr < bufend) { int r; r = secure_write(MyProcPort, bufptr, bufend - bufptr); if (r <= 0) { if (errno == EINTR) continue; /* Ok if we were interrupted */ /* * Careful: an ereport() that tries to write to the client would * cause recursion to here, leading to stack overflow and core * dump! This message must go *only* to the postmaster log. * * If a client disconnects while we're in the midst of output, we * might write quite a bit of data before we get to a safe query * abort point. So, suppress duplicate log messages. */ if (errno != last_reported_send_errno) { last_reported_send_errno = errno; ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not send data to client: %m"))); } /* * We drop the buffered data anyway so that processing can * continue, even though we'll probably quit soon. */ PqSendPointer = 0; return EOF; } last_reported_send_errno = 0; /* reset after any successful send */ bufptr += r; } PqSendPointer = 0; return 0; }
/* * Read result from specified connection. * Return 0 at success or EOF at error. */ int pool_recvres(PoolPort *port) { int r; int res = 0; uint n32; char buf[SEND_RES_BUFFER_SIZE]; r = recv(Socket(*port), &buf, SEND_RES_BUFFER_SIZE, 0); if (r < 0) { /* * Report broken connection */ ereport(ERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); goto failure; } else if (r == 0) { goto failure; } else if (r != SEND_RES_BUFFER_SIZE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete message from client"))); goto failure; } /* Verify response */ if (buf[0] != 's') { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected message code"))); goto failure; } memcpy(&n32, buf + 1, 4); n32 = ntohl(n32); if (n32 != 0) return EOF; return res; failure: return EOF; }
int FileRepConnServer_ReceiveMessageData( char *data, uint32 length) { if (pq_getbytes(data, length) == EOF) { ereport(WARNING, (errcode_for_socket_access(), errmsg("receive EOF on connection: %m"))); return STATUS_ERROR; } return STATUS_OK; }
/* -------------------------------- * pq_getbyte_if_available - get a single byte from connection, * if available * * The received byte is stored in *c. Returns 1 if a byte was read, * 0 if no data was available, or EOF if trouble. * -------------------------------- */ int pq_getbyte_if_available(unsigned char *c) { int r; if (PqRecvPointer < PqRecvLength) { *c = PqRecvBuffer[PqRecvPointer++]; return 1; } /* Put the socket into non-blocking mode */ pq_set_nonblocking(true); r = secure_read(MyProcPort, c, 1); if (r < 0) { /* * Ok if no data available without blocking or interrupted (though * EINTR really shouldn't happen with a non-blocking socket). * Report other errors. */ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) r = 0; else { /* * Careful: an ereport() that tries to write to the client * would cause recursion to here, leading to stack overflow * and core dump! This message must go *only* to the * postmaster log. */ ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); r = EOF; } } else if (r == 0) { /* EOF detected */ r = EOF; } return r; }
/* * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL */ static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action) { struct epoll_event epoll_ev; int rc; /* pointer to our event, returned by epoll_wait */ epoll_ev.data.ptr = event; /* always wait for errors */ epoll_ev.events = EPOLLERR | EPOLLHUP; /* prepare pollfd entry once */ if (event->events == WL_LATCH_SET) { Assert(set->latch != NULL); epoll_ev.events |= EPOLLIN; } else if (event->events == WL_POSTMASTER_DEATH) { epoll_ev.events |= EPOLLIN; } else { Assert(event->fd != PGINVALID_SOCKET); Assert(event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)); if (event->events & WL_SOCKET_READABLE) epoll_ev.events |= EPOLLIN; if (event->events & WL_SOCKET_WRITEABLE) epoll_ev.events |= EPOLLOUT; } /* * Even though unused, we also pass epoll_ev as the data argument if * EPOLL_CTL_DEL is passed as action. There used to be an epoll bug * requiring that, and actually it makes the code simpler... */ rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev); if (rc < 0) ereport(ERROR, (errcode_for_socket_access(), errmsg("epoll_ctl() failed: %m"))); }
int FileRepConnServer_Select(void) { struct timeval timeout; fd_set rfds; int retval; timeout.tv_sec = 0; timeout.tv_usec = 100 * 1000L; FD_ZERO(&rfds); FD_SET(listenSocket[0], &rfds); retval = select(listenSocket[0] + 1, &rfds, NULL, NULL, &timeout); /* * check and process any signals received The routine returns TRUE if the * received signal requests process shutdown. */ if (retval) { if (!FD_ISSET(listenSocket[0], &rfds)) { retval = -1; } } if (retval == -1) { ereport(WARNING, (errcode_for_socket_access(), errmsg("receive failure on connection: %m"), FileRep_errcontext())); } return retval; }
/* * Like WaitLatch, but with an extra socket argument for WL_SOCKET_* * conditions. * * When waiting on a socket, WL_SOCKET_READABLE *must* be included in * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional. The reason for this is * that EOF and error conditions are reported only via WL_SOCKET_READABLE. */ int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, long timeout) { int result = 0; int rc; instr_time start_time, cur_time; long cur_timeout; #ifdef HAVE_POLL struct pollfd pfds[3]; int nfds; #else struct timeval tv, *tvp; fd_set input_mask; fd_set output_mask; int hifd; #endif /* Ignore WL_SOCKET_* events if no valid socket is given */ if (sock == PGINVALID_SOCKET) wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); Assert(wakeEvents != 0); /* must have at least one wake event */ /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */ Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE); if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) elog(ERROR, "cannot wait on a latch owned by another process"); /* * Initialize timeout if requested. We must record the current time so * that we can determine the remaining timeout if the poll() or select() * is interrupted. (On some platforms, select() will update the contents * of "tv" for us, but unfortunately we can't rely on that.) */ if (wakeEvents & WL_TIMEOUT) { INSTR_TIME_SET_CURRENT(start_time); Assert(timeout >= 0 && timeout <= INT_MAX); cur_timeout = timeout; #ifndef HAVE_POLL tv.tv_sec = cur_timeout / 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L; tvp = &tv; #endif } else { cur_timeout = -1; #ifndef HAVE_POLL tvp = NULL; #endif } waiting = true; do { /* * Clear the pipe, then check if the latch is set already. If someone * sets the latch between this and the poll()/select() below, the * setter will write a byte to the pipe (or signal us and the signal * handler will do that), and the poll()/select() will return * immediately. * * Note: we assume that the kernel calls involved in drainSelfPipe() * and SetLatch() will provide adequate synchronization on machines * with weak memory ordering, so that we cannot miss seeing is_set if * the signal byte is already in the pipe when we drain it. */ drainSelfPipe(); if ((wakeEvents & WL_LATCH_SET) && latch->is_set) { result |= WL_LATCH_SET; /* * Leave loop immediately, avoid blocking again. We don't attempt * to report any other events that might also be satisfied. */ break; } /* Must wait ... we use poll(2) if available, otherwise select(2) */ #ifdef HAVE_POLL nfds = 0; if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { /* socket, if used, is always in pfds[0] */ pfds[0].fd = sock; pfds[0].events = 0; if (wakeEvents & WL_SOCKET_READABLE) pfds[0].events |= POLLIN; if (wakeEvents & WL_SOCKET_WRITEABLE) pfds[0].events |= POLLOUT; pfds[0].revents = 0; nfds++; } pfds[nfds].fd = selfpipe_readfd; pfds[nfds].events = POLLIN; pfds[nfds].revents = 0; nfds++; if (wakeEvents & WL_POSTMASTER_DEATH) { /* postmaster fd, if used, is always in pfds[nfds - 1] */ pfds[nfds].fd = postmaster_alive_fds[POSTMASTER_FD_WATCH]; pfds[nfds].events = POLLIN; pfds[nfds].revents = 0; nfds++; } /* Sleep */ rc = poll(pfds, nfds, (int) cur_timeout); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("poll() failed: %m"))); } } else if (rc == 0) { /* timeout exceeded */ if (wakeEvents & WL_TIMEOUT) result |= WL_TIMEOUT; } else { /* at least one event occurred, so check revents values */ if ((wakeEvents & WL_SOCKET_READABLE) && (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) { /* data available in socket, or EOF/error condition */ result |= WL_SOCKET_READABLE; } if ((wakeEvents & WL_SOCKET_WRITEABLE) && (pfds[0].revents & POLLOUT)) { result |= WL_SOCKET_WRITEABLE; } /* * We expect a POLLHUP when the remote end is closed, but because * we don't expect the pipe to become readable or to have any * errors either, treat those cases as postmaster death, too. */ if ((wakeEvents & WL_POSTMASTER_DEATH) && (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL))) { /* * According to the select(2) man page on Linux, select(2) may * spuriously return and report a file descriptor as readable, * when it's not; and presumably so can poll(2). It's not * clear that the relevant cases would ever apply to the * postmaster pipe, but since the consequences of falsely * returning WL_POSTMASTER_DEATH could be pretty unpleasant, * we take the trouble to positively verify EOF with * PostmasterIsAlive(). */ if (!PostmasterIsAlive()) result |= WL_POSTMASTER_DEATH; } } #else /* !HAVE_POLL */ FD_ZERO(&input_mask); FD_ZERO(&output_mask); FD_SET(selfpipe_readfd, &input_mask); hifd = selfpipe_readfd; if (wakeEvents & WL_POSTMASTER_DEATH) { FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask); if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd) hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH]; } if (wakeEvents & WL_SOCKET_READABLE) { FD_SET(sock, &input_mask); if (sock > hifd) hifd = sock; } if (wakeEvents & WL_SOCKET_WRITEABLE) { FD_SET(sock, &output_mask); if (sock > hifd) hifd = sock; } /* Sleep */ rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("select() failed: %m"))); } } else if (rc == 0) { /* timeout exceeded */ if (wakeEvents & WL_TIMEOUT) result |= WL_TIMEOUT; } else { /* at least one event occurred, so check masks */ if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask)) { /* data available in socket, or EOF */ result |= WL_SOCKET_READABLE; } if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask)) { result |= WL_SOCKET_WRITEABLE; } if ((wakeEvents & WL_POSTMASTER_DEATH) && FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask)) { /* * According to the select(2) man page on Linux, select(2) may * spuriously return and report a file descriptor as readable, * when it's not; and presumably so can poll(2). It's not * clear that the relevant cases would ever apply to the * postmaster pipe, but since the consequences of falsely * returning WL_POSTMASTER_DEATH could be pretty unpleasant, * we take the trouble to positively verify EOF with * PostmasterIsAlive(). */ if (!PostmasterIsAlive()) result |= WL_POSTMASTER_DEATH; } } #endif /* HAVE_POLL */ /* If we're not done, update cur_timeout for next iteration */ if (result == 0 && cur_timeout >= 0) { INSTR_TIME_SET_CURRENT(cur_time); INSTR_TIME_SUBTRACT(cur_time, start_time); cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time); if (cur_timeout < 0) cur_timeout = 0; #ifndef HAVE_POLL tv.tv_sec = cur_timeout / 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L; #endif } } while (result == 0); waiting = false; return result; }
/* * Attempt to negotiate SSL connection. */ static int open_server_SSL(Port *port) { int r; int err; Assert(!port->ssl); Assert(!port->peer); if (!(port->ssl = SSL_new(SSL_context))) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not initialize SSL connection: %s", SSLerrmessage()))); close_SSL(port); return -1; } if (!my_SSL_set_fd(port->ssl, port->sock)) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not set SSL socket: %s", SSLerrmessage()))); close_SSL(port); return -1; } aloop: r = SSL_accept(port->ssl); if (r <= 0) { err = SSL_get_error(port->ssl, r); switch (err) { case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_WRITE: #ifdef WIN32 pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl), (err == SSL_ERROR_WANT_READ) ? FD_READ | FD_CLOSE | FD_ACCEPT : FD_WRITE | FD_CLOSE, INFINITE); #endif goto aloop; case SSL_ERROR_SYSCALL: if (r < 0) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not accept SSL connection: %m"))); else ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: EOF detected"))); break; case SSL_ERROR_SSL: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: %s", SSLerrmessage()))); break; case SSL_ERROR_ZERO_RETURN: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: EOF detected"))); break; default: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unrecognized SSL error code: %d", err))); break; } close_SSL(port); return -1; } port->count = 0; /* Get client certificate, if available. */ port->peer = SSL_get_peer_certificate(port->ssl); /* and extract the Common Name from it. */ port->peer_cn = NULL; if (port->peer != NULL) { int len; len = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer), NID_commonName, NULL, 0); if (len != -1) { char *peer_cn; peer_cn = MemoryContextAlloc(TopMemoryContext, len + 1); r = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer), NID_commonName, peer_cn, len+1); peer_cn[len] = '\0'; if (r != len) { /* shouldn't happen */ pfree(peer_cn); close_SSL(port); return -1; } /* * Reject embedded NULLs in certificate common name to prevent * attacks like CVE-2009-4034. */ if (len != strlen(peer_cn)) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("SSL certificate's common name contains embedded null"))); pfree(peer_cn); close_SSL(port); return -1; } port->peer_cn = peer_cn; } } ereport(DEBUG2, (errmsg("SSL connection from \"%s\"", port->peer_cn ? port->peer_cn : "(anonymous)"))); /* set up debugging/info callback */ SSL_CTX_set_info_callback(SSL_context, info_cb); return 0; }
/* * Wait using linux's epoll_wait(2). * * This is the preferrable wait method, as several readiness notifications are * delivered, without having to iterate through all of set->events. The return * epoll_event struct contain a pointer to our events, making association * easy. */ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, WaitEvent *occurred_events, int nevents) { int returned_events = 0; int rc; WaitEvent *cur_event; struct epoll_event *cur_epoll_event; /* Sleep */ rc = epoll_wait(set->epoll_fd, set->epoll_ret_events, nevents, cur_timeout); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("epoll_wait() failed: %m"))); } return 0; } else if (rc == 0) { /* timeout exceeded */ return -1; } /* * At least one event occurred, iterate over the returned epoll events * until they're either all processed, or we've returned all the events * the caller desired. */ for (cur_epoll_event = set->epoll_ret_events; cur_epoll_event < (set->epoll_ret_events + rc) && returned_events < nevents; cur_epoll_event++) { /* epoll's data pointer is set to the associated WaitEvent */ cur_event = (WaitEvent *) cur_epoll_event->data.ptr; occurred_events->pos = cur_event->pos; occurred_events->user_data = cur_event->user_data; occurred_events->events = 0; if (cur_event->events == WL_LATCH_SET && cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)) { /* There's data in the self-pipe, clear it. */ drainSelfPipe(); if (set->latch->is_set) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_LATCH_SET; occurred_events++; returned_events++; } } else if (cur_event->events == WL_POSTMASTER_DEATH && cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)) { /* * We expect an EPOLLHUP when the remote end is closed, but * because we don't expect the pipe to become readable or to have * any errors either, treat those cases as postmaster death, too. * * As explained in the WAIT_USE_SELECT implementation, select(2) * may spuriously return. Be paranoid about that here too, a * spurious WL_POSTMASTER_DEATH would be painful. */ if (!PostmasterIsAlive()) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_POSTMASTER_DEATH; occurred_events++; returned_events++; } } else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { Assert(cur_event->fd != PGINVALID_SOCKET); if ((cur_event->events & WL_SOCKET_READABLE) && (cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))) { /* data available in socket, or EOF */ occurred_events->events |= WL_SOCKET_READABLE; } if ((cur_event->events & WL_SOCKET_WRITEABLE) && (cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP))) { /* writable, or EOF */ occurred_events->events |= WL_SOCKET_WRITEABLE; } if (occurred_events->events != 0) { occurred_events->fd = cur_event->fd; occurred_events++; returned_events++; } } } return returned_events; }
/* * Wait using select(2). * * XXX: On at least older linux kernels select(), in violation of POSIX, * doesn't reliably return a socket as writable if closed - but we rely on * that. So far all the known cases of this problem are on platforms that also * provide a poll() implementation without that bug. If we find one where * that's not the case, we'll need to add a workaround. */ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, WaitEvent *occurred_events, int nevents) { int returned_events = 0; int rc; WaitEvent *cur_event; fd_set input_mask; fd_set output_mask; int hifd; struct timeval tv; struct timeval *tvp = NULL; FD_ZERO(&input_mask); FD_ZERO(&output_mask); /* * Prepare input/output masks. We do so every loop iteration as there's no * entirely portable way to copy fd_sets. */ for (cur_event = set->events; cur_event < (set->events + set->nevents); cur_event++) { if (cur_event->events == WL_LATCH_SET) FD_SET(cur_event->fd, &input_mask); else if (cur_event->events == WL_POSTMASTER_DEATH) FD_SET(cur_event->fd, &input_mask); else { Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)); if (cur_event->events == WL_SOCKET_READABLE) FD_SET(cur_event->fd, &input_mask); else if (cur_event->events == WL_SOCKET_WRITEABLE) FD_SET(cur_event->fd, &output_mask); } if (cur_event->fd > hifd) hifd = cur_event->fd; } /* Sleep */ if (cur_timeout >= 0) { tv.tv_sec = cur_timeout / 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L; tvp = &tv; } rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("select() failed: %m"))); } return 0; /* retry */ } else if (rc == 0) { /* timeout exceeded */ return -1; } /* * To associate events with select's masks, we have to check the status of * the file descriptors associated with an event; by looping through all * events. */ for (cur_event = set->events; cur_event < (set->events + set->nevents) && returned_events < nevents; cur_event++) { occurred_events->pos = cur_event->pos; occurred_events->user_data = cur_event->user_data; occurred_events->events = 0; if (cur_event->events == WL_LATCH_SET && FD_ISSET(cur_event->fd, &input_mask)) { /* There's data in the self-pipe, clear it. */ drainSelfPipe(); if (set->latch->is_set) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_LATCH_SET; occurred_events++; returned_events++; } } else if (cur_event->events == WL_POSTMASTER_DEATH && FD_ISSET(cur_event->fd, &input_mask)) { /* * According to the select(2) man page on Linux, select(2) may * spuriously return and report a file descriptor as readable, * when it's not; and presumably so can poll(2). It's not clear * that the relevant cases would ever apply to the postmaster * pipe, but since the consequences of falsely returning * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the * trouble to positively verify EOF with PostmasterIsAlive(). */ if (!PostmasterIsAlive()) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_POSTMASTER_DEATH; occurred_events++; returned_events++; } } else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { Assert(cur_event->fd != PGINVALID_SOCKET); if ((cur_event->events & WL_SOCKET_READABLE) && FD_ISSET(cur_event->fd, &input_mask)) { /* data available in socket, or EOF */ occurred_events->events |= WL_SOCKET_READABLE; } if ((cur_event->events & WL_SOCKET_WRITEABLE) && FD_ISSET(cur_event->fd, &output_mask)) { /* socket is writeable, or EOF */ occurred_events->events |= WL_SOCKET_WRITEABLE; } if (occurred_events->events != 0) { occurred_events->fd = cur_event->fd; occurred_events++; returned_events++; } } } return returned_events; }
/* * Client authentication starts here. If there is an error, this * function does not return and the backend process is terminated. */ void ClientAuthentication(Port *port) { int status = STATUS_ERROR; /* * Get the authentication method to use for this frontend/database * combination. Note: a failure return indicates a problem with the * hba config file, not with the request. hba.c should have dropped * an error message into the postmaster logfile if it failed. */ if (hba_getauthmethod(port) != STATUS_OK) ereport(FATAL, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("missing or erroneous pg_hba.conf file"), errhint("See server log for details."))); switch (port->auth_method) { case uaReject: /* * This could have come from an explicit "reject" entry in * pg_hba.conf, but more likely it means there was no matching * entry. Take pity on the poor user and issue a helpful * error message. NOTE: this is not a security breach, * because all the info reported here is known at the frontend * and must be assumed known to bad guys. We're merely helping * out the less clueful good guys. */ { char hostinfo[NI_MAXHOST]; getnameinfo_all(&port->raddr.addr, port->raddr.salen, hostinfo, sizeof(hostinfo), NULL, 0, NI_NUMERICHOST); #ifdef USE_SSL ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), errmsg("no pg_hba.conf entry for host \"%s\", user \"%s\", database \"%s\", %s", hostinfo, port->user_name, port->database_name, port->ssl ? gettext("SSL on") : gettext("SSL off")))); #else ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), errmsg("no pg_hba.conf entry for host \"%s\", user \"%s\", database \"%s\"", hostinfo, port->user_name, port->database_name))); #endif break; } case uaKrb4: /* Kerberos 4 only seems to work with AF_INET. */ if (port->raddr.addr.ss_family != AF_INET || port->laddr.addr.ss_family != AF_INET) ereport(FATAL, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Kerberos 4 only supports IPv4 connections"))); sendAuthRequest(port, AUTH_REQ_KRB4); status = pg_krb4_recvauth(port); break; case uaKrb5: sendAuthRequest(port, AUTH_REQ_KRB5); status = pg_krb5_recvauth(port); break; case uaIdent: /* * If we are doing ident on unix-domain sockets, use SCM_CREDS * only if it is defined and SO_PEERCRED isn't. */ #if !defined(HAVE_GETPEEREID) && !defined(SO_PEERCRED) && \ (defined(HAVE_STRUCT_CMSGCRED) || defined(HAVE_STRUCT_FCRED) || \ (defined(HAVE_STRUCT_SOCKCRED) && defined(LOCAL_CREDS))) if (port->raddr.addr.ss_family == AF_UNIX) { #if defined(HAVE_STRUCT_FCRED) || defined(HAVE_STRUCT_SOCKCRED) /* * Receive credentials on next message receipt, BSD/OS, * NetBSD. We need to set this before the client sends the * next packet. */ int on = 1; if (setsockopt(port->sock, 0, LOCAL_CREDS, &on, sizeof(on)) < 0) ereport(FATAL, (errcode_for_socket_access(), errmsg("could not enable credential reception: %m"))); #endif sendAuthRequest(port, AUTH_REQ_SCM_CREDS); } #endif status = authident(port); break; case uaMD5: sendAuthRequest(port, AUTH_REQ_MD5); status = recv_and_check_password_packet(port); break; case uaCrypt: sendAuthRequest(port, AUTH_REQ_CRYPT); status = recv_and_check_password_packet(port); break; case uaPassword: sendAuthRequest(port, AUTH_REQ_PASSWORD); status = recv_and_check_password_packet(port); break; #ifdef USE_PAM case uaPAM: pam_port_cludge = port; status = CheckPAMAuth(port, port->user_name, ""); break; #endif /* USE_PAM */ case uaTrust: status = STATUS_OK; break; } if (status == STATUS_OK) sendAuthRequest(port, AUTH_REQ_OK); else auth_failed(port, status); }
/* * StreamConnection -- create a new connection with client using * server port. Set port->sock to the FD of the new connection. * * ASSUME: that this doesn't need to be non-blocking because * the Postmaster uses select() to tell when the server master * socket is ready for accept(). * * RETURNS: STATUS_OK or STATUS_ERROR */ int StreamConnection(pgsocket server_fd, Port *port) { /* accept connection and fill in the client (remote) address */ port->raddr.salen = sizeof(port->raddr.addr); if ((port->sock = accept(server_fd, (struct sockaddr *) & port->raddr.addr, &port->raddr.salen)) < 0) { ereport(LOG, (errcode_for_socket_access(), errmsg("could not accept new connection: %m"))); /* * If accept() fails then postmaster.c will still see the server * socket as read-ready, and will immediately try again. To avoid * uselessly sucking lots of CPU, delay a bit before trying again. * (The most likely reason for failure is being out of kernel file * table slots; we can do little except hope some will get freed up.) */ pg_usleep(100000L); /* wait 0.1 sec */ return STATUS_ERROR; } #ifdef SCO_ACCEPT_BUG /* * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it * shouldn't hurt to catch it for all versions of those platforms. */ if (port->raddr.addr.ss_family == 0) port->raddr.addr.ss_family = AF_UNIX; #endif /* fill in the server (local) address */ port->laddr.salen = sizeof(port->laddr.addr); if (getsockname(port->sock, (struct sockaddr *) & port->laddr.addr, &port->laddr.salen) < 0) { elog(LOG, "getsockname() failed: %m"); return STATUS_ERROR; } /* select NODELAY and KEEPALIVE options if it's a TCP connection */ if (!IS_AF_UNIX(port->laddr.addr.ss_family)) { int on; #ifdef TCP_NODELAY on = 1; if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) { elog(LOG, "setsockopt(TCP_NODELAY) failed: %m"); return STATUS_ERROR; } #endif on = 1; if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on)) < 0) { elog(LOG, "setsockopt(SO_KEEPALIVE) failed: %m"); return STATUS_ERROR; } #ifdef WIN32 /* * This is a Win32 socket optimization. The ideal size is 32k. * http://support.microsoft.com/kb/823764/EN-US/ */ on = PQ_BUFFER_SIZE * 4; if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &on, sizeof(on)) < 0) { elog(LOG, "setsockopt(SO_SNDBUF) failed: %m"); return STATUS_ERROR; } #endif /* * Also apply the current keepalive parameters. If we fail to set a * parameter, don't error out, because these aren't universally * supported. (Note: you might think we need to reset the GUC * variables to 0 in such a case, but it's not necessary because the * show hooks for these variables report the truth anyway.) */ (void) pq_setkeepalivesidle(tcp_keepalives_idle, port); (void) pq_setkeepalivesinterval(tcp_keepalives_interval, port); (void) pq_setkeepalivescount(tcp_keepalives_count, port); } return STATUS_OK; }
/* * ClientConnectionReady checks if the given connection is ready for non-blocking * reads or writes. This function is loosely based on pqSocketCheck() at fe-misc.c * and libpq_select() at libpqwalreceiver.c. */ static bool ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatus) { bool clientConnectionReady = false; int pollResult = 0; /* we use poll(2) if available, otherwise select(2) */ #ifdef HAVE_POLL int fileDescriptorCount = 1; int immediateTimeout = 0; int pollEventMask = 0; struct pollfd pollFileDescriptor; if (pollingStatus == PGRES_POLLING_READING) { pollEventMask = POLLERR | POLLIN; } else if (pollingStatus == PGRES_POLLING_WRITING) { pollEventMask = POLLERR | POLLOUT; } pollFileDescriptor.fd = PQsocket(connection); pollFileDescriptor.events = pollEventMask; pollFileDescriptor.revents = 0; pollResult = poll(&pollFileDescriptor, fileDescriptorCount, immediateTimeout); #else /* !HAVE_POLL */ fd_set readFileDescriptorSet; fd_set writeFileDescriptorSet; fd_set exceptionFileDescriptorSet; struct timeval immediateTimeout = { 0, 0 }; int connectionFileDescriptor = PQsocket(connection); FD_ZERO(&readFileDescriptorSet); FD_ZERO(&writeFileDescriptorSet); FD_ZERO(&exceptionFileDescriptorSet); if (pollingStatus == PGRES_POLLING_READING) { FD_SET(connectionFileDescriptor, &exceptionFileDescriptorSet); FD_SET(connectionFileDescriptor, &readFileDescriptorSet); } else if (pollingStatus == PGRES_POLLING_WRITING) { FD_SET(connectionFileDescriptor, &exceptionFileDescriptorSet); FD_SET(connectionFileDescriptor, &writeFileDescriptorSet); } pollResult = select(connectionFileDescriptor + 1, &readFileDescriptorSet, &writeFileDescriptorSet, &exceptionFileDescriptorSet, &immediateTimeout); #endif /* HAVE_POLL */ if (pollResult > 0) { clientConnectionReady = true; } else if (pollResult == 0) { clientConnectionReady = false; } else if (pollResult < 0) { if (errno == EINTR) { /* * If a signal was caught, we return false so the caller polls the * connection again. */ clientConnectionReady = false; } else { /* * poll() or select() can set errno to EFAULT (when socket is not * contained in the calling program's address space), EBADF (invalid * file descriptor), EINVAL (invalid arguments to select or poll), * and ENOMEM (no space to allocate file descriptor tables). Out of * these, only ENOMEM is likely here, and it is a fatal error, so we * error out. */ Assert(errno == ENOMEM); ereport(ERROR, (errcode_for_socket_access(), errmsg("select()/poll() failed: %m"))); } } return clientConnectionReady; }
/* * Postmaster subroutine to start a syslogger subprocess. */ int SysLogger_Start(void) { pid_t sysloggerPid; char *filename; if (!Logging_collector) return 0; /* * If first time through, create the pipe which will receive stderr * output. * * If the syslogger crashes and needs to be restarted, we continue to use * the same pipe (indeed must do so, since extant backends will be writing * into that pipe). * * This means the postmaster must continue to hold the read end of the * pipe open, so we can pass it down to the reincarnated syslogger. This * is a bit klugy but we have little choice. */ #ifndef WIN32 if (syslogPipe[0] < 0) { if (pipe(syslogPipe) < 0) ereport(FATAL, (errcode_for_socket_access(), (errmsg("could not create pipe for syslog: %m")))); } #else if (!syslogPipe[0]) { SECURITY_ATTRIBUTES sa; memset(&sa, 0, sizeof(SECURITY_ATTRIBUTES)); sa.nLength = sizeof(SECURITY_ATTRIBUTES); sa.bInheritHandle = TRUE; if (!CreatePipe(&syslogPipe[0], &syslogPipe[1], &sa, 32768)) ereport(FATAL, (errcode_for_file_access(), (errmsg("could not create pipe for syslog: %m")))); } #endif /* * Create log directory if not present; ignore errors */ mkdir(Log_directory, S_IRWXU); /* * The initial logfile is created right in the postmaster, to verify that * the Log_directory is writable. We save the reference time so that the * syslogger child process can recompute this file name. * * It might look a bit strange to re-do this during a syslogger restart, * but we must do so since the postmaster closed syslogFile after the * previous fork (and remembering that old file wouldn't be right anyway). * Note we always append here, we won't overwrite any existing file. This * is consistent with the normal rules, because by definition this is not * a time-based rotation. */ first_syslogger_file_time = time(NULL); filename = logfile_getname(first_syslogger_file_time, NULL); syslogFile = logfile_open(filename, "a", false); pfree(filename); #ifdef EXEC_BACKEND switch ((sysloggerPid = syslogger_forkexec())) #else switch ((sysloggerPid = fork_process())) #endif { case -1: ereport(LOG, (errmsg("could not fork system logger: %m"))); return 0; #ifndef EXEC_BACKEND case 0: /* in postmaster child ... */ /* Close the postmaster's sockets */ ClosePostmasterPorts(true); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Drop our connection to postmaster's shared memory, as well */ PGSharedMemoryDetach(); /* do the work */ SysLoggerMain(0, NULL); break; #endif default: /* success, in postmaster */ /* now we redirect stderr, if not done already */ if (!redirection_done) { #ifndef WIN32 fflush(stdout); if (dup2(syslogPipe[1], fileno(stdout)) < 0) ereport(FATAL, (errcode_for_file_access(), errmsg("could not redirect stdout: %m"))); fflush(stderr); if (dup2(syslogPipe[1], fileno(stderr)) < 0) ereport(FATAL, (errcode_for_file_access(), errmsg("could not redirect stderr: %m"))); /* Now we are done with the write end of the pipe. */ close(syslogPipe[1]); syslogPipe[1] = -1; #else int fd; /* * open the pipe in binary mode and make sure stderr is binary * after it's been dup'ed into, to avoid disturbing the pipe * chunking protocol. */ fflush(stderr); fd = _open_osfhandle((intptr_t) syslogPipe[1], _O_APPEND | _O_BINARY); if (dup2(fd, _fileno(stderr)) < 0) ereport(FATAL, (errcode_for_file_access(), errmsg("could not redirect stderr: %m"))); close(fd); _setmode(_fileno(stderr), _O_BINARY); /* * Now we are done with the write end of the pipe. * CloseHandle() must not be called because the preceding * close() closes the underlying handle. */ syslogPipe[1] = 0; #endif redirection_done = true; } /* postmaster will never write the file; close it */ fclose(syslogFile); syslogFile = NULL; return (int) sysloggerPid; } /* we should never reach here */ return 0; }
/* * Main entry point for syslogger process * argc/argv parameters are valid only in EXEC_BACKEND case. */ NON_EXEC_STATIC void SysLoggerMain(int argc, char *argv[]) { #ifndef WIN32 char logbuffer[READ_BUF_SIZE]; int bytes_in_logbuffer = 0; #endif char *currentLogDir; char *currentLogFilename; int currentLogRotationAge; pg_time_t now; IsUnderPostmaster = true; /* we are a postmaster subprocess now */ MyProcPid = getpid(); /* reset MyProcPid */ MyStartTime = time(NULL); /* set our start time in case we call elog */ now = MyStartTime; #ifdef EXEC_BACKEND syslogger_parseArgs(argc, argv); #endif /* EXEC_BACKEND */ am_syslogger = true; init_ps_display("logger process", "", "", ""); /* * If we restarted, our stderr is already redirected into our own input * pipe. This is of course pretty useless, not to mention that it * interferes with detecting pipe EOF. Point stderr to /dev/null. This * assumes that all interesting messages generated in the syslogger will * come through elog.c and will be sent to write_syslogger_file. */ if (redirection_done) { int fd = open(DEVNULL, O_WRONLY, 0); /* * The closes might look redundant, but they are not: we want to be * darn sure the pipe gets closed even if the open failed. We can * survive running with stderr pointing nowhere, but we can't afford * to have extra pipe input descriptors hanging around. */ close(fileno(stdout)); close(fileno(stderr)); if (fd != -1) { dup2(fd, fileno(stdout)); dup2(fd, fileno(stderr)); close(fd); } } /* * Syslogger's own stderr can't be the syslogPipe, so set it back to text * mode if we didn't just close it. (It was set to binary in * SubPostmasterMain). */ #ifdef WIN32 else _setmode(_fileno(stderr), _O_TEXT); #endif /* * Also close our copy of the write end of the pipe. This is needed to * ensure we can detect pipe EOF correctly. (But note that in the restart * case, the postmaster already did this.) */ #ifndef WIN32 if (syslogPipe[1] >= 0) close(syslogPipe[1]); syslogPipe[1] = -1; #else if (syslogPipe[1]) CloseHandle(syslogPipe[1]); syslogPipe[1] = 0; #endif /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (syslogger probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif InitializeLatchSupport(); /* needed for latch waits */ /* Initialize private latch for use by signal handlers */ InitLatch(&sysLoggerLatch); /* * Properly accept or ignore signals the postmaster might send us * * Note: we ignore all termination signals, and instead exit only when all * upstream processes are gone, to ensure we don't miss any dying gasps of * broken backends... */ pqsignal(SIGHUP, sigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGQUIT, SIG_IGN); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, sigUsr1Handler); /* request log rotation */ pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); PG_SETMASK(&UnBlockSig); #ifdef WIN32 /* Fire up separate data transfer thread */ InitializeCriticalSection(&sysloggerSection); EnterCriticalSection(&sysloggerSection); threadHandle = (HANDLE) _beginthreadex(NULL, 0, pipeThread, NULL, 0, NULL); if (threadHandle == 0) elog(FATAL, "could not create syslogger data transfer thread: %m"); #endif /* WIN32 */ /* * Remember active logfile's name. We recompute this from the reference * time because passing down just the pg_time_t is a lot cheaper than * passing a whole file path in the EXEC_BACKEND case. */ last_file_name = logfile_getname(first_syslogger_file_time, NULL); /* remember active logfile parameters */ currentLogDir = pstrdup(Log_directory); currentLogFilename = pstrdup(Log_filename); currentLogRotationAge = Log_RotationAge; /* set next planned rotation time */ set_next_rotation_time(); /* main worker loop */ for (;;) { bool time_based_rotation = false; int size_rotation_for = 0; long cur_timeout; int cur_flags; #ifndef WIN32 int rc; #endif /* Clear any already-pending wakeups */ ResetLatch(&sysLoggerLatch); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Check if the log directory or filename pattern changed in * postgresql.conf. If so, force rotation to make sure we're * writing the logfiles in the right place. */ if (strcmp(Log_directory, currentLogDir) != 0) { pfree(currentLogDir); currentLogDir = pstrdup(Log_directory); rotation_requested = true; /* * Also, create new directory if not present; ignore errors */ mkdir(Log_directory, S_IRWXU); } if (strcmp(Log_filename, currentLogFilename) != 0) { pfree(currentLogFilename); currentLogFilename = pstrdup(Log_filename); rotation_requested = true; } /* * If rotation time parameter changed, reset next rotation time, * but don't immediately force a rotation. */ if (currentLogRotationAge != Log_RotationAge) { currentLogRotationAge = Log_RotationAge; set_next_rotation_time(); } /* * If we had a rotation-disabling failure, re-enable rotation * attempts after SIGHUP, and force one immediately. */ if (rotation_disabled) { rotation_disabled = false; rotation_requested = true; } } if (Log_RotationAge > 0 && !rotation_disabled) { /* Do a logfile rotation if it's time */ now = (pg_time_t) time(NULL); if (now >= next_rotation_time) rotation_requested = time_based_rotation = true; } if (!rotation_requested && Log_RotationSize > 0 && !rotation_disabled) { /* Do a rotation if file is too big */ if (ftell(syslogFile) >= Log_RotationSize * 1024L) { rotation_requested = true; size_rotation_for |= LOG_DESTINATION_STDERR; } if (csvlogFile != NULL && ftell(csvlogFile) >= Log_RotationSize * 1024L) { rotation_requested = true; size_rotation_for |= LOG_DESTINATION_CSVLOG; } } if (rotation_requested) { /* * Force rotation when both values are zero. It means the request * was sent by pg_rotate_logfile. */ if (!time_based_rotation && size_rotation_for == 0) size_rotation_for = LOG_DESTINATION_STDERR | LOG_DESTINATION_CSVLOG; logfile_rotate(time_based_rotation, size_rotation_for); } /* * Calculate time till next time-based rotation, so that we don't * sleep longer than that. We assume the value of "now" obtained * above is still close enough. Note we can't make this calculation * until after calling logfile_rotate(), since it will advance * next_rotation_time. * * Also note that we need to beware of overflow in calculation of the * timeout: with large settings of Log_RotationAge, next_rotation_time * could be more than INT_MAX msec in the future. In that case we'll * wait no more than INT_MAX msec, and try again. */ if (Log_RotationAge > 0 && !rotation_disabled) { pg_time_t delay; delay = next_rotation_time - now; if (delay > 0) { if (delay > INT_MAX / 1000) delay = INT_MAX / 1000; cur_timeout = delay * 1000L; /* msec */ } else cur_timeout = 0; cur_flags = WL_TIMEOUT; } else { cur_timeout = -1L; cur_flags = 0; } /* * Sleep until there's something to do */ #ifndef WIN32 rc = WaitLatchOrSocket(&sysLoggerLatch, WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags, syslogPipe[0], cur_timeout); if (rc & WL_SOCKET_READABLE) { int bytesRead; bytesRead = read(syslogPipe[0], logbuffer + bytes_in_logbuffer, sizeof(logbuffer) - bytes_in_logbuffer); if (bytesRead < 0) { if (errno != EINTR) ereport(LOG, (errcode_for_socket_access(), errmsg("could not read from logger pipe: %m"))); } else if (bytesRead > 0) { bytes_in_logbuffer += bytesRead; process_pipe_input(logbuffer, &bytes_in_logbuffer); continue; } else { /* * Zero bytes read when select() is saying read-ready means * EOF on the pipe: that is, there are no longer any processes * with the pipe write end open. Therefore, the postmaster * and all backends are shut down, and we are done. */ pipe_eof_seen = true; /* if there's any data left then force it out now */ flush_pipe_input(logbuffer, &bytes_in_logbuffer); } } #else /* WIN32 */ /* * On Windows we leave it to a separate thread to transfer data and * detect pipe EOF. The main thread just wakes up to handle SIGHUP * and rotation conditions. * * Server code isn't generally thread-safe, so we ensure that only one * of the threads is active at a time by entering the critical section * whenever we're not sleeping. */ LeaveCriticalSection(&sysloggerSection); (void) WaitLatch(&sysLoggerLatch, WL_LATCH_SET | cur_flags, cur_timeout); EnterCriticalSection(&sysloggerSection); #endif /* WIN32 */ if (pipe_eof_seen) { /* * seeing this message on the real stderr is annoying - so we make * it DEBUG1 to suppress in normal use. */ ereport(DEBUG1, (errmsg("logger shutting down"))); /* * Normal exit from the syslogger is here. Note that we * deliberately do not close syslogFile before exiting; this is to * allow for the possibility of elog messages being generated * inside proc_exit. Regular exit() will take care of flushing * and closing stdio channels. */ proc_exit(0); } } }
/* * Read a message from the specified connection carrying pid numbers * of transactions interacting with pooler */ int pool_recvpids(PoolPort *port, int **pids) { int r, i; uint n32; char buf[SEND_PID_BUFFER_SIZE]; /* * Buffer size is upper bounded by the maximum number of connections, * as in the pooler each connection has one Pooler Agent. */ r = recv(Socket(*port), &buf, SEND_PID_BUFFER_SIZE, 0); if (r < 0) { /* * Report broken connection */ ereport(ERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); goto failure; } else if (r == 0) { goto failure; } else if (r != SEND_PID_BUFFER_SIZE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete message from client"))); goto failure; } /* Verify response */ if (buf[0] != 'p') { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected message code"))); goto failure; } memcpy(&n32, buf + 1, 4); n32 = ntohl(n32); if (n32 == 0) { elog(WARNING, "No transaction to abort"); return n32; } *pids = (int *) palloc(sizeof(int) * n32); for (i = 0; i < n32; i++) { int n; memcpy(&n, buf + 5 + i * sizeof(int), sizeof(int)); *pids[i] = ntohl(n); } return n32; failure: return 0; }
/* * Wait using poll(2). * * This allows to receive readiness notifications for several events at once, * but requires iterating through all of set->pollfds. */ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, WaitEvent *occurred_events, int nevents) { int returned_events = 0; int rc; WaitEvent *cur_event; struct pollfd *cur_pollfd; /* Sleep */ rc = poll(set->pollfds, set->nevents, (int) cur_timeout); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("poll() failed: %m"))); } return 0; } else if (rc == 0) { /* timeout exceeded */ return -1; } for (cur_event = set->events, cur_pollfd = set->pollfds; cur_event < (set->events + set->nevents) && returned_events < nevents; cur_event++, cur_pollfd++) { /* no activity on this FD, skip */ if (cur_pollfd->revents == 0) continue; occurred_events->pos = cur_event->pos; occurred_events->user_data = cur_event->user_data; occurred_events->events = 0; if (cur_event->events == WL_LATCH_SET && (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) { /* There's data in the self-pipe, clear it. */ drainSelfPipe(); if (set->latch->is_set) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_LATCH_SET; occurred_events++; returned_events++; } } else if (cur_event->events == WL_POSTMASTER_DEATH && (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) { /* * We expect an POLLHUP when the remote end is closed, but because * we don't expect the pipe to become readable or to have any * errors either, treat those cases as postmaster death, too. * * As explained in the WAIT_USE_SELECT implementation, select(2) * may spuriously return. Be paranoid about that here too, a * spurious WL_POSTMASTER_DEATH would be painful. */ if (!PostmasterIsAlive()) { occurred_events->fd = PGINVALID_SOCKET; occurred_events->events = WL_POSTMASTER_DEATH; occurred_events++; returned_events++; } } else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { int errflags = POLLHUP | POLLERR | POLLNVAL; Assert(cur_event->fd >= PGINVALID_SOCKET); if ((cur_event->events & WL_SOCKET_READABLE) && (cur_pollfd->revents & (POLLIN | errflags))) { /* data available in socket, or EOF */ occurred_events->events |= WL_SOCKET_READABLE; } if ((cur_event->events & WL_SOCKET_WRITEABLE) && (cur_pollfd->revents & (POLLOUT | errflags))) { /* writeable, or EOF */ occurred_events->events |= WL_SOCKET_WRITEABLE; } if (occurred_events->events != 0) { occurred_events->fd = cur_event->fd; occurred_events++; returned_events++; } } } return returned_events; }
int StreamServerPort(int family, char *hostName, unsigned short portNumber, char *unixSocketName, pgsocket ListenSocket[], int MaxListen) { pgsocket fd; int err; int maxconn; int ret; char portNumberStr[32]; const char *familyDesc; char familyDescBuf[64]; char *service; struct addrinfo *addrs = NULL, *addr; struct addrinfo hint; int listen_index = 0; int added = 0; #if !defined(WIN32) || defined(IPV6_V6ONLY) int one = 1; #endif /* Initialize hint structure */ MemSet(&hint, 0, sizeof(hint)); hint.ai_family = family; hint.ai_flags = AI_PASSIVE; hint.ai_socktype = SOCK_STREAM; #ifdef HAVE_UNIX_SOCKETS if (family == AF_UNIX) { /* Lock_AF_UNIX will also fill in sock_path. */ if (Lock_AF_UNIX(portNumber, unixSocketName) != STATUS_OK) return STATUS_ERROR; service = sock_path; } else #endif /* HAVE_UNIX_SOCKETS */ { snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber); service = portNumberStr; } ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs); if (ret || !addrs) { if (hostName) ereport(LOG, (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s", hostName, service, gai_strerror(ret)))); else ereport(LOG, (errmsg("could not translate service \"%s\" to address: %s", service, gai_strerror(ret)))); if (addrs) pg_freeaddrinfo_all(hint.ai_family, addrs); return STATUS_ERROR; } for (addr = addrs; addr; addr = addr->ai_next) { if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family)) { /* * Only set up a unix domain socket when they really asked for it. * The service/port is different in that case. */ continue; } /* See if there is still room to add 1 more socket. */ for (; listen_index < MaxListen; listen_index++) { if (ListenSocket[listen_index] == PGINVALID_SOCKET) break; } if (listen_index >= MaxListen) { ereport(LOG, (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded", MaxListen))); break; } /* set up family name for possible error messages */ switch (addr->ai_family) { case AF_INET: familyDesc = _("IPv4"); break; #ifdef HAVE_IPV6 case AF_INET6: familyDesc = _("IPv6"); break; #endif #ifdef HAVE_UNIX_SOCKETS case AF_UNIX: familyDesc = _("Unix"); break; #endif default: snprintf(familyDescBuf, sizeof(familyDescBuf), _("unrecognized address family %d"), addr->ai_family); familyDesc = familyDescBuf; break; } if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) < 0) { ereport(LOG, (errcode_for_socket_access(), /* translator: %s is IPv4, IPv6, or Unix */ errmsg("could not create %s socket: %m", familyDesc))); continue; } #ifndef WIN32 /* * Without the SO_REUSEADDR flag, a new postmaster can't be started * right away after a stop or crash, giving "address already in use" * error on TCP ports. * * On win32, however, this behavior only happens if the * SO_EXLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows multiple * servers to listen on the same address, resulting in unpredictable * behavior. With no flags at all, win32 behaves as Unix with * SO_REUSEADDR. */ if (!IS_AF_UNIX(addr->ai_family)) { if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1) { ereport(LOG, (errcode_for_socket_access(), errmsg("setsockopt(SO_REUSEADDR) failed: %m"))); closesocket(fd); continue; } } #endif #ifdef IPV6_V6ONLY if (addr->ai_family == AF_INET6) { if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &one, sizeof(one)) == -1) { ereport(LOG, (errcode_for_socket_access(), errmsg("setsockopt(IPV6_V6ONLY) failed: %m"))); closesocket(fd); continue; } } #endif /* * Note: This might fail on some OS's, like Linux older than * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4 * connections. */ err = bind(fd, addr->ai_addr, addr->ai_addrlen); if (err < 0) { ereport(LOG, (errcode_for_socket_access(), /* translator: %s is IPv4, IPv6, or Unix */ errmsg("could not bind %s socket: %m", familyDesc), (IS_AF_UNIX(addr->ai_family)) ? errhint("Is another postmaster already running on port %d?" " If not, remove socket file \"%s\" and retry.", (int) portNumber, sock_path) : errhint("Is another postmaster already running on port %d?" " If not, wait a few seconds and retry.", (int) portNumber))); closesocket(fd); continue; } #ifdef HAVE_UNIX_SOCKETS if (addr->ai_family == AF_UNIX) { if (Setup_AF_UNIX() != STATUS_OK) { closesocket(fd); break; } } #endif /* * Select appropriate accept-queue length limit. PG_SOMAXCONN is only * intended to provide a clamp on the request on platforms where an * overly large request provokes a kernel error (are there any?). */ maxconn = MaxBackends * 2; if (maxconn > PG_SOMAXCONN) maxconn = PG_SOMAXCONN; err = listen(fd, maxconn); if (err < 0) { ereport(LOG, (errcode_for_socket_access(), /* translator: %s is IPv4, IPv6, or Unix */ errmsg("could not listen on %s socket: %m", familyDesc))); closesocket(fd); continue; } ListenSocket[listen_index] = fd; added++; } pg_freeaddrinfo_all(hint.ai_family, addrs); if (!added) return STATUS_ERROR; return STATUS_OK; }
/* * Attempt to negotiate SSL connection. */ static int open_server_SSL(Port *port) { int r; int err; Assert(!port->ssl); Assert(!port->peer); if (!(port->ssl = SSL_new(SSL_context))) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not initialize SSL connection: %s", SSLerrmessage()))); close_SSL(port); return -1; } if (!my_SSL_set_fd(port->ssl, port->sock)) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not set SSL socket: %s", SSLerrmessage()))); close_SSL(port); return -1; } aloop: r = SSL_accept(port->ssl); if (r <= 0) { err = SSL_get_error(port->ssl, r); switch (err) { case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_WRITE: #ifdef WIN32 pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl), (err == SSL_ERROR_WANT_READ) ? FD_READ | FD_CLOSE | FD_ACCEPT : FD_WRITE | FD_CLOSE, INFINITE); #endif goto aloop; case SSL_ERROR_SYSCALL: if (r < 0) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not accept SSL connection: %m"))); else ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: EOF detected"))); break; case SSL_ERROR_SSL: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: %s", SSLerrmessage()))); break; case SSL_ERROR_ZERO_RETURN: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not accept SSL connection: EOF detected"))); break; default: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unrecognized SSL error code: %d", err))); break; } close_SSL(port); return -1; } port->count = 0; /* get client certificate, if available. */ port->peer = SSL_get_peer_certificate(port->ssl); if (port->peer == NULL) { strlcpy(port->peer_dn, "(anonymous)", sizeof(port->peer_dn)); strlcpy(port->peer_cn, "(anonymous)", sizeof(port->peer_cn)); } else { X509_NAME_oneline(X509_get_subject_name(port->peer), port->peer_dn, sizeof(port->peer_dn)); port->peer_dn[sizeof(port->peer_dn) - 1] = '\0'; r = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer), NID_commonName, port->peer_cn, sizeof(port->peer_cn)); port->peer_cn[sizeof(port->peer_cn) - 1] = '\0'; if (r == -1) { /* Unable to get the CN, set it to blank so it can't be used */ port->peer_cn[0] = '\0'; } else { /* * Reject embedded NULLs in certificate common name to prevent * attacks like CVE-2009-4034. */ if (r != strlen(port->peer_cn)) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("SSL certificate's common name contains embedded null"))); close_SSL(port); return -1; } } } ereport(DEBUG2, (errmsg("SSL connection from \"%s\"", port->peer_cn))); /* set up debugging/info callback */ SSL_CTX_set_info_callback(SSL_context, info_cb); return 0; }
/* -------------------------------- * pq_getbyte_if_available - get a single byte from connection, * if available * * The received byte is stored in *c. Returns 1 if a byte was read, * 0 if no data was available, or EOF if trouble. * -------------------------------- */ int pq_getbyte_if_available(unsigned char *c) { int r; if (PqRecvPointer < PqRecvLength) { *c = PqRecvBuffer[PqRecvPointer++]; return 1; } /* Temporarily put the socket into non-blocking mode */ #ifdef WIN32 pgwin32_noblock = 1; #else if (!pg_set_noblock(MyProcPort->sock)) ereport(ERROR, (errmsg("could not set socket to non-blocking mode: %m"))); #endif MyProcPort->noblock = true; PG_TRY(); { r = secure_read(MyProcPort, c, 1); if (r < 0) { /* * Ok if no data available without blocking or interrupted (though * EINTR really shouldn't happen with a non-blocking socket). * Report other errors. */ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) r = 0; else { /* * Careful: an ereport() that tries to write to the client * would cause recursion to here, leading to stack overflow * and core dump! This message must go *only* to the * postmaster log. */ ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); r = EOF; } } else if (r == 0) { /* EOF detected */ r = EOF; } } PG_CATCH(); { /* * The rest of the backend code assumes the socket is in blocking * mode, so treat failure as FATAL. */ #ifdef WIN32 pgwin32_noblock = 0; #else if (!pg_set_block(MyProcPort->sock)) ereport(FATAL, (errmsg("could not set socket to blocking mode: %m"))); #endif MyProcPort->noblock = false; PG_RE_THROW(); } PG_END_TRY(); #ifdef WIN32 pgwin32_noblock = 0; #else if (!pg_set_block(MyProcPort->sock)) ereport(FATAL, (errmsg("could not set socket to blocking mode: %m"))); #endif MyProcPort->noblock = false; return r; }
/* * Read a message from the specified connection carrying file descriptors */ int pool_recvfds(PoolPort *port, int *fds, int count) { int r; uint n32; char buf[SEND_MSG_BUFFER_SIZE]; struct iovec iov[1]; struct msghdr msg; int controllen = CMSG_LEN(count * sizeof(int)); struct cmsghdr *cmptr = malloc(CMSG_SPACE(count * sizeof(int))); if (cmptr == NULL) return EOF; iov[0].iov_base = buf; iov[0].iov_len = SEND_MSG_BUFFER_SIZE; msg.msg_iov = iov; msg.msg_iovlen = 1; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = (caddr_t) cmptr; msg.msg_controllen = controllen; r = recvmsg(Socket(*port), &msg, 0); if (r < 0) { /* * Report broken connection */ ereport(ERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); goto failure; } else if (r == 0) { goto failure; } else if (r != SEND_MSG_BUFFER_SIZE) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete message from client"))); goto failure; } /* Verify response */ if (buf[0] != 'f') { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected message code"))); goto failure; } memcpy(&n32, buf + 1, 4); n32 = ntohl(n32); if (n32 != 8) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid message size"))); goto failure; } /* * If connection count is 0 it means pool does not have connections * to fulfill request. Otherwise number of returned connections * should be equal to requested count. If it not the case consider this * a protocol violation. (Probably connection went out of sync) */ memcpy(&n32, buf + 5, 4); n32 = ntohl(n32); if (n32 == 0) { ereport(LOG, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("failed to acquire connections"))); goto failure; } if (n32 != count) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected connection count"))); goto failure; } memcpy(fds, CMSG_DATA(CMSG_FIRSTHDR(&msg)), count * sizeof(int)); free(cmptr); return 0; failure: free(cmptr); return EOF; }