int
FileRepConnServer_ReceiveMessageType(
									 FileRepConsumerProcIndex_e *fileRepMessageType)
{

	char		messageType;

	int			status = STATUS_OK;

	messageType = pq_getbyte();


	switch (messageType)
	{

		case '1':
			*fileRepMessageType = FileRepMessageTypeXLog;
			break;

		case '2':
			*fileRepMessageType = FileRepMessageTypeAO01;
			break;

		case '3':
			*fileRepMessageType = FileRepMessageTypeWriter;
			break;

		case 'S':
			*fileRepMessageType = FileRepMessageTypeShutdown;
			break;

		case EOF:
			ereport(WARNING,
					(errcode_for_socket_access(),
					 errmsg("receive EOF on connection: %m")));
			status = STATUS_ERROR;
			break;
		case 'X':
			/* Close Message(sent by PQfinish()) */

			/*
			 * Client closed connection.  Client does not wait for response.
			 */
			ereport(WARNING,
					(errcode_for_socket_access(),
					 errmsg("receive close on connection: %m")));
			status = STATUS_ERROR;
			break;
		default:
			ereport(WARNING,
					(errcode_for_socket_access(),
					 errmsg("receive unexpected message type on connection: %m")));
			status = STATUS_ERROR;
			break;
	}

	return status;
}
int
FileRepConnServer_CreateConnection()
{
	int			status = STATUS_OK;

	port = (Port *) calloc(1, sizeof(Port));
	if (port == NULL)
	{
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 (errmsg("not enough memory to create connection"))));
		return status;
	}

	status = StreamConnection(listenSocket[0], port);
	if (status != STATUS_OK)
	{
		ereport(WARNING,
				(errcode_for_socket_access(),
				 errmsg("could not accept connection: %m"),
				 FileRep_errcontext()));

		if (port->sock >= 0)
		{
			StreamClose(port->sock);
		}
		ConnFree();
	}
	else
	{
		/*
		 * MPP-14225: On NIC failure, filerep receiver process's recv() system
		 * call will take hours to timeout, depending on the TCP timeout. Add
		 * SO_RCVTIMEO timeout to filerep receiver process's socket to avoid
		 * this.
		 */
		struct timeval tv;

		tv.tv_sec = file_rep_socket_timeout;
		tv.tv_usec = 0;			/* Not initializing this can cause strange
								 * errors */

		if (setsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(struct timeval)) == -1)
			ereport(WARNING,
					(errcode_for_socket_access(),
					 errmsg("could not set receive timeout on socket")));

		/* set TCP keep-alive parameters for FileRep connection */
		(void) pq_setkeepalivesidle(gp_filerep_tcp_keepalives_idle, port);
		(void) pq_setkeepalivesinterval(gp_filerep_tcp_keepalives_interval, port);
		(void) pq_setkeepalivescount(gp_filerep_tcp_keepalives_count, port);

		MyProcPort = port;
	}

	return status;
}
Example #3
0
/*
 * Wait until we can read WAL stream, or timeout.
 *
 * Returns true if data has become available for reading, false if timed out
 * or interrupted by signal.
 *
 * This is based on pqSocketCheck.
 */
static bool
libpq_select(int timeout_ms)
{
	int			ret;

	Assert(streamConn != NULL);
	if (PQsocket(streamConn) < 0)
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("socket not open")));

	/* We use poll(2) if available, otherwise select(2) */
	{
#ifdef HAVE_POLL
		struct pollfd input_fd;

		input_fd.fd = PQsocket(streamConn);
		input_fd.events = POLLIN | POLLERR;
		input_fd.revents = 0;

		ret = poll(&input_fd, 1, timeout_ms);
#else							/* !HAVE_POLL */

		fd_set		input_mask;
		struct timeval timeout;
		struct timeval *ptr_timeout;

		FD_ZERO(&input_mask);
		FD_SET(PQsocket(streamConn), &input_mask);

		if (timeout_ms < 0)
			ptr_timeout = NULL;
		else
		{
			timeout.tv_sec = timeout_ms / 1000;
			timeout.tv_usec = (timeout_ms % 1000) * 1000;
			ptr_timeout = &timeout;
		}

		ret = select(PQsocket(streamConn) + 1, &input_mask,
					 NULL, NULL, ptr_timeout);
#endif   /* HAVE_POLL */
	}

	if (ret == 0 || (ret < 0 && errno == EINTR))
		return false;
	if (ret < 0)
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("select() failed: %m")));
	return true;
}
Example #4
0
int 
FileRepConnServer_StartListener(
		char	*hostAddress,
		int		portLocal)
{

	int	status = STATUS_OK;
	int i;
	
	for (i=0; i < FILEREP_MAX_LISTEN; i++) {
		listenSocket[i] = -1;
	}
	
	/* NOTE check if family AF_UNIX has to be considered as well */
	status = StreamServerPort(
					AF_UNSPEC, 
					hostAddress,
					(unsigned short) portLocal,
					NULL,
					listenSocket,
					FILEREP_MAX_LISTEN);

	if (status != STATUS_OK) {
		ereport(WARNING, 
				(errcode_for_socket_access(),
				 errmsg("could not start listener, host:'%s' port:'%d': %m",
						hostAddress,
						portLocal),
				 errSendAlert(true),
				 FileRep_errcontext()));
	}
	
	return status;
}
Example #5
0
int
FileRepConnServer_ReceiveMessageLength(uint32 *len)

{
	int32 length;
	
	if (pq_getbytes((char*) &length, 4) == EOF) {
		ereport(WARNING,
				(errcode_for_socket_access(),
				 errmsg("receive EOF on connection: %m")));
		
		return STATUS_ERROR;
	}

	length = ntohl(length);
	
	if (length < 4) {
		ereport(WARNING,
				 (errmsg("receive unexpected message length on connection")));
		return STATUS_ERROR;
	}
	
	length -= 4;
	
	*len = length;

	return STATUS_OK;
}
Example #6
0
/*
 * set_connection_status_bad does not remove the given connection from the connection hash.
 * It simply shuts down the underlying socket. On success, it returns true.
 */
Datum
set_connection_status_bad(PG_FUNCTION_ARGS)
{
	char *nodeName = PG_GETARG_CSTRING(0);
	int32 nodePort = PG_GETARG_INT32(1);
	int socket = -1;
	int shutdownStatus = 0;
	int pqStatus PG_USED_FOR_ASSERTS_ONLY = 0;

	PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
	if (connection == NULL)
	{
		PG_RETURN_BOOL(false);
	}

	/* Prevent further reads/writes... */
	socket = PQsocket(connection);
	shutdownStatus = shutdown(socket, SHUT_RDWR);
	if (shutdownStatus != 0)
	{
		ereport(ERROR, (errcode_for_socket_access(), errmsg("shutdown failed")));
	}

	/* ... and make libpq notice by reading data. */
	pqStatus = PQconsumeInput(connection);

	Assert(pqStatus == 0); /* expect failure */

	PG_RETURN_BOOL(true);
}
Example #7
0
/* --------------------------------
 *		pq_recvbuf - load some bytes into the input buffer
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int
pq_recvbuf(void)
{
    if (PqRecvPointer > 0)
    {
        if (PqRecvLength > PqRecvPointer)
        {
            /* still some unread data, left-justify it in the buffer */
            memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer,
                    PqRecvLength - PqRecvPointer);
            PqRecvLength -= PqRecvPointer;
            PqRecvPointer = 0;
        }
        else
            PqRecvLength = PqRecvPointer = 0;
    }

    /* Ensure that we're in blocking mode */
    pq_set_nonblocking(false);

    /* Can fill buffer from PqRecvLength and upwards */
    for (;;)
    {
        int			r;

        r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength,
                        PQ_RECV_BUFFER_SIZE - PqRecvLength);

        if (r < 0)
        {
            if (errno == EINTR)
                continue;		/* Ok if interrupted */

            /*
             * Careful: an ereport() that tries to write to the client would
             * cause recursion to here, leading to stack overflow and core
             * dump!  This message must go *only* to the postmaster log.
             */
            ereport(COMMERROR,
                    (errcode_for_socket_access(),
                     errmsg("could not receive data from client: %m")));
            return EOF;
        }
        if (r == 0)
        {
            /*
             * EOF detected.  We used to write a log message here, but it's
             * better to expect the ultimate caller to do that.
             */
            return EOF;
        }
        /* r contains number of bytes read, so just incr length */
        PqRecvLength += r;
        return 0;
    }
}
int
FileRepConnClient_EstablishConnection(
									  char *hostAddress,
									  int port,
									  bool reportError)
{
	int			status = STATUS_OK;
	char		portbuf[11];
	char		timeoutbuf[11];
	const char *keys[5];
	const char *vals[5];

/*	FileRepConnClient_CloseConnection();*/

	snprintf(portbuf, sizeof(portbuf), "%d", port);
	snprintf(timeoutbuf, sizeof(timeoutbuf), "%d", gp_segment_connect_timeout);

	keys[0] = "host";
	vals[0] = hostAddress;
	keys[1] = "port";
	vals[1] = portbuf;
	keys[2] = "dbname";
	vals[2] = "postgres";
	keys[3] = "connect_timeout";
	vals[3] = timeoutbuf;
	keys[4] = NULL;
	vals[4] = NULL;

	filerep_conn = PQconnectdbParams(keys, vals, false);

	if (PQstatus(filerep_conn) != CONNECTION_OK)
	{
		if (reportError || Debug_filerep_print)
			ereport(WARNING,
					(errcode_for_socket_access(),
					 errmsg("could not establish connection with server, host:'%s' port:'%d' err:'%s' : %m",
							hostAddress,
							port,
							PQerrorMessage(filerep_conn)),
					 errSendAlert(true),
					 FileRep_errcontext()));

		status = STATUS_ERROR;

		if (filerep_conn)
		{
			PQfinish(filerep_conn);
			filerep_conn = NULL;
		}
	}

	/* NOTE Handle error message see ftsprobe.c */

	return status;
}
Example #9
0
/* --------------------------------
 * pool_recvbuf - load some bytes into the input buffer
 *
 * returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int
pool_recvbuf(PoolPort *port)
{
	if (port->RecvPointer > 0)
	{
		if (port->RecvLength > port->RecvPointer)
		{
			/* still some unread data, left-justify it in the buffer */
			memmove(port->RecvBuffer, port->RecvBuffer + port->RecvPointer,
					port->RecvLength - port->RecvPointer);
			port->RecvLength -= port->RecvPointer;
			port->RecvPointer = 0;
		}
		else
			port->RecvLength = port->RecvPointer = 0;
	}

	/* Can fill buffer from PqRecvLength and upwards */
	for (;;)
	{
		int			r;

		r = recv(Socket(*port), port->RecvBuffer + port->RecvLength,
				 POOL_BUFFER_SIZE - port->RecvLength, 0);

		if (r < 0)
		{
			if (errno == EINTR)
				continue;		/* Ok if interrupted */

			/*
			 * Report broken connection
			 */
			ereport(LOG,
					(errcode_for_socket_access(),
					 errmsg("could not receive data from client: %m")));
			return EOF;
		}
		if (r == 0)
		{
			/*
			 * EOF detected.  We used to write a log message here, but it's
			 * better to expect the ultimate caller to do that.
			 */
			return EOF;
		}
		/* r contains number of bytes read, so just incr length */
		port->RecvLength += r;
		return 0;
	}
}
Example #10
0
static int
internal_flush(void)
{
	static int	last_reported_send_errno = 0;

	char	   *bufptr = PqSendBuffer;
	char	   *bufend = PqSendBuffer + PqSendPointer;

	while (bufptr < bufend)
	{
		int			r;

		r = secure_write(MyProcPort, bufptr, bufend - bufptr);

		if (r <= 0)
		{
			if (errno == EINTR)
				continue;		/* Ok if we were interrupted */

			/*
			 * Careful: an ereport() that tries to write to the client would
			 * cause recursion to here, leading to stack overflow and core
			 * dump!  This message must go *only* to the postmaster log.
			 *
			 * If a client disconnects while we're in the midst of output, we
			 * might write quite a bit of data before we get to a safe query
			 * abort point.  So, suppress duplicate log messages.
			 */
			if (errno != last_reported_send_errno)
			{
				last_reported_send_errno = errno;
				ereport(COMMERROR,
						(errcode_for_socket_access(),
						 errmsg("could not send data to client: %m")));
			}

			/*
			 * We drop the buffered data anyway so that processing can
			 * continue, even though we'll probably quit soon.
			 */
			PqSendPointer = 0;
			return EOF;
		}

		last_reported_send_errno = 0;	/* reset after any successful send */
		bufptr += r;
	}

	PqSendPointer = 0;
	return 0;
}
Example #11
0
/*
 * Read result from specified connection.
 * Return 0 at success or EOF at error.
 */
int
pool_recvres(PoolPort *port)
{
	int			r;
	int			res = 0;
	uint		n32;
	char		buf[SEND_RES_BUFFER_SIZE];

	r = recv(Socket(*port), &buf, SEND_RES_BUFFER_SIZE, 0);
	if (r < 0)
	{
		/*
		 * Report broken connection
		 */
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("could not receive data from client: %m")));
		goto failure;
	}
	else if (r == 0)
	{
		goto failure;
	}
	else if (r != SEND_RES_BUFFER_SIZE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("incomplete message from client")));
		goto failure;
	}

	/* Verify response */
	if (buf[0] != 's')
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("unexpected message code")));
		goto failure;
	}

	memcpy(&n32, buf + 1, 4);
	n32 = ntohl(n32);
	if (n32 != 0)
		return EOF;

	return res;

failure:
	return EOF;
}
Example #12
0
 int
 FileRepConnServer_ReceiveMessageData(
			char		*data,
			uint32		length)
 {

	 if (pq_getbytes(data, length) == EOF) 
	 {
		 ereport(WARNING,
				 (errcode_for_socket_access(),
				  errmsg("receive EOF on connection: %m")));

		 return STATUS_ERROR;
	 }

	 return STATUS_OK;
 }
Example #13
0
/* --------------------------------
 *		pq_getbyte_if_available - get a single byte from connection,
 *			if available
 *
 * The received byte is stored in *c. Returns 1 if a byte was read,
 * 0 if no data was available, or EOF if trouble.
 * --------------------------------
 */
int
pq_getbyte_if_available(unsigned char *c)
{
    int			r;

    if (PqRecvPointer < PqRecvLength)
    {
        *c = PqRecvBuffer[PqRecvPointer++];
        return 1;
    }

    /* Put the socket into non-blocking mode */
    pq_set_nonblocking(true);

    r = secure_read(MyProcPort, c, 1);
    if (r < 0)
    {
        /*
         * Ok if no data available without blocking or interrupted (though
         * EINTR really shouldn't happen with a non-blocking socket).
         * Report other errors.
         */
        if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
            r = 0;
        else
        {
            /*
             * Careful: an ereport() that tries to write to the client
             * would cause recursion to here, leading to stack overflow
             * and core dump!  This message must go *only* to the
             * postmaster log.
             */
            ereport(COMMERROR,
                    (errcode_for_socket_access(),
                     errmsg("could not receive data from client: %m")));
            r = EOF;
        }
    }
    else if (r == 0)
    {
        /* EOF detected */
        r = EOF;
    }

    return r;
}
Example #14
0
/*
 * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL
 */
static void
WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
{
	struct epoll_event epoll_ev;
	int			rc;

	/* pointer to our event, returned by epoll_wait */
	epoll_ev.data.ptr = event;
	/* always wait for errors */
	epoll_ev.events = EPOLLERR | EPOLLHUP;

	/* prepare pollfd entry once */
	if (event->events == WL_LATCH_SET)
	{
		Assert(set->latch != NULL);
		epoll_ev.events |= EPOLLIN;
	}
	else if (event->events == WL_POSTMASTER_DEATH)
	{
		epoll_ev.events |= EPOLLIN;
	}
	else
	{
		Assert(event->fd != PGINVALID_SOCKET);
		Assert(event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));

		if (event->events & WL_SOCKET_READABLE)
			epoll_ev.events |= EPOLLIN;
		if (event->events & WL_SOCKET_WRITEABLE)
			epoll_ev.events |= EPOLLOUT;
	}

	/*
	 * Even though unused, we also pass epoll_ev as the data argument if
	 * EPOLL_CTL_DEL is passed as action.  There used to be an epoll bug
	 * requiring that, and actually it makes the code simpler...
	 */
	rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev);

	if (rc < 0)
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("epoll_ctl() failed: %m")));
}
int
FileRepConnServer_Select(void)
{
	struct timeval timeout;
	fd_set		rfds;
	int			retval;

	timeout.tv_sec = 0;
	timeout.tv_usec = 100 * 1000L;

	FD_ZERO(&rfds);

	FD_SET(listenSocket[0], &rfds);

	retval = select(listenSocket[0] + 1, &rfds, NULL, NULL, &timeout);

	/*
	 * check and process any signals received The routine returns TRUE if the
	 * received signal requests process shutdown.
	 */

	if (retval)
	{
		if (!FD_ISSET(listenSocket[0], &rfds))
		{
			retval = -1;
		}
	}

	if (retval == -1)
	{
		ereport(WARNING,
				(errcode_for_socket_access(),
				 errmsg("receive failure on connection: %m"),
				 FileRep_errcontext()));
	}

	return retval;
}
Example #16
0
/*
 * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
 * conditions.
 *
 * When waiting on a socket, WL_SOCKET_READABLE *must* be included in
 * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional.  The reason for this is
 * that EOF and error conditions are reported only via WL_SOCKET_READABLE.
 */
int
WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
				  long timeout)
{
	int			result = 0;
	int			rc;
	instr_time	start_time,
				cur_time;
	long		cur_timeout;

#ifdef HAVE_POLL
	struct pollfd pfds[3];
	int			nfds;
#else
	struct timeval tv,
			   *tvp;
	fd_set		input_mask;
	fd_set		output_mask;
	int			hifd;
#endif

	/* Ignore WL_SOCKET_* events if no valid socket is given */
	if (sock == PGINVALID_SOCKET)
		wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);

	Assert(wakeEvents != 0);	/* must have at least one wake event */
	/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
	Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);

	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
		elog(ERROR, "cannot wait on a latch owned by another process");

	/*
	 * Initialize timeout if requested.  We must record the current time so
	 * that we can determine the remaining timeout if the poll() or select()
	 * is interrupted.	(On some platforms, select() will update the contents
	 * of "tv" for us, but unfortunately we can't rely on that.)
	 */
	if (wakeEvents & WL_TIMEOUT)
	{
		INSTR_TIME_SET_CURRENT(start_time);
		Assert(timeout >= 0 && timeout <= INT_MAX);
		cur_timeout = timeout;

#ifndef HAVE_POLL
		tv.tv_sec = cur_timeout / 1000L;
		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
		tvp = &tv;
#endif
	}
	else
	{
		cur_timeout = -1;

#ifndef HAVE_POLL
		tvp = NULL;
#endif
	}

	waiting = true;
	do
	{
		/*
		 * Clear the pipe, then check if the latch is set already. If someone
		 * sets the latch between this and the poll()/select() below, the
		 * setter will write a byte to the pipe (or signal us and the signal
		 * handler will do that), and the poll()/select() will return
		 * immediately.
		 *
		 * Note: we assume that the kernel calls involved in drainSelfPipe()
		 * and SetLatch() will provide adequate synchronization on machines
		 * with weak memory ordering, so that we cannot miss seeing is_set if
		 * the signal byte is already in the pipe when we drain it.
		 */
		drainSelfPipe();

		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
		{
			result |= WL_LATCH_SET;

			/*
			 * Leave loop immediately, avoid blocking again. We don't attempt
			 * to report any other events that might also be satisfied.
			 */
			break;
		}

		/* Must wait ... we use poll(2) if available, otherwise select(2) */
#ifdef HAVE_POLL
		nfds = 0;
		if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			/* socket, if used, is always in pfds[0] */
			pfds[0].fd = sock;
			pfds[0].events = 0;
			if (wakeEvents & WL_SOCKET_READABLE)
				pfds[0].events |= POLLIN;
			if (wakeEvents & WL_SOCKET_WRITEABLE)
				pfds[0].events |= POLLOUT;
			pfds[0].revents = 0;
			nfds++;
		}

		pfds[nfds].fd = selfpipe_readfd;
		pfds[nfds].events = POLLIN;
		pfds[nfds].revents = 0;
		nfds++;

		if (wakeEvents & WL_POSTMASTER_DEATH)
		{
			/* postmaster fd, if used, is always in pfds[nfds - 1] */
			pfds[nfds].fd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
			pfds[nfds].events = POLLIN;
			pfds[nfds].revents = 0;
			nfds++;
		}

		/* Sleep */
		rc = poll(pfds, nfds, (int) cur_timeout);

		/* Check return code */
		if (rc < 0)
		{
			/* EINTR is okay, otherwise complain */
			if (errno != EINTR)
			{
				waiting = false;
				ereport(ERROR,
						(errcode_for_socket_access(),
						 errmsg("poll() failed: %m")));
			}
		}
		else if (rc == 0)
		{
			/* timeout exceeded */
			if (wakeEvents & WL_TIMEOUT)
				result |= WL_TIMEOUT;
		}
		else
		{
			/* at least one event occurred, so check revents values */
			if ((wakeEvents & WL_SOCKET_READABLE) &&
				(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
			{
				/* data available in socket, or EOF/error condition */
				result |= WL_SOCKET_READABLE;
			}
			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
				(pfds[0].revents & POLLOUT))
			{
				result |= WL_SOCKET_WRITEABLE;
			}

			/*
			 * We expect a POLLHUP when the remote end is closed, but because
			 * we don't expect the pipe to become readable or to have any
			 * errors either, treat those cases as postmaster death, too.
			 */
			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
				(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
			{
				/*
				 * According to the select(2) man page on Linux, select(2) may
				 * spuriously return and report a file descriptor as readable,
				 * when it's not; and presumably so can poll(2).  It's not
				 * clear that the relevant cases would ever apply to the
				 * postmaster pipe, but since the consequences of falsely
				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
				 * we take the trouble to positively verify EOF with
				 * PostmasterIsAlive().
				 */
				if (!PostmasterIsAlive())
					result |= WL_POSTMASTER_DEATH;
			}
		}
#else							/* !HAVE_POLL */

		FD_ZERO(&input_mask);
		FD_ZERO(&output_mask);

		FD_SET(selfpipe_readfd, &input_mask);
		hifd = selfpipe_readfd;

		if (wakeEvents & WL_POSTMASTER_DEATH)
		{
			FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask);
			if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd)
				hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
		}

		if (wakeEvents & WL_SOCKET_READABLE)
		{
			FD_SET(sock, &input_mask);
			if (sock > hifd)
				hifd = sock;
		}

		if (wakeEvents & WL_SOCKET_WRITEABLE)
		{
			FD_SET(sock, &output_mask);
			if (sock > hifd)
				hifd = sock;
		}

		/* Sleep */
		rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);

		/* Check return code */
		if (rc < 0)
		{
			/* EINTR is okay, otherwise complain */
			if (errno != EINTR)
			{
				waiting = false;
				ereport(ERROR,
						(errcode_for_socket_access(),
						 errmsg("select() failed: %m")));
			}
		}
		else if (rc == 0)
		{
			/* timeout exceeded */
			if (wakeEvents & WL_TIMEOUT)
				result |= WL_TIMEOUT;
		}
		else
		{
			/* at least one event occurred, so check masks */
			if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
			{
				/* data available in socket, or EOF */
				result |= WL_SOCKET_READABLE;
			}
			if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
			{
				result |= WL_SOCKET_WRITEABLE;
			}
			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
			FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
			{
				/*
				 * According to the select(2) man page on Linux, select(2) may
				 * spuriously return and report a file descriptor as readable,
				 * when it's not; and presumably so can poll(2).  It's not
				 * clear that the relevant cases would ever apply to the
				 * postmaster pipe, but since the consequences of falsely
				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
				 * we take the trouble to positively verify EOF with
				 * PostmasterIsAlive().
				 */
				if (!PostmasterIsAlive())
					result |= WL_POSTMASTER_DEATH;
			}
		}
#endif   /* HAVE_POLL */

		/* If we're not done, update cur_timeout for next iteration */
		if (result == 0 && cur_timeout >= 0)
		{
			INSTR_TIME_SET_CURRENT(cur_time);
			INSTR_TIME_SUBTRACT(cur_time, start_time);
			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
			if (cur_timeout < 0)
				cur_timeout = 0;

#ifndef HAVE_POLL
			tv.tv_sec = cur_timeout / 1000L;
			tv.tv_usec = (cur_timeout % 1000L) * 1000L;
#endif
		}
	} while (result == 0);
	waiting = false;

	return result;
}
Example #17
0
/*
 *	Attempt to negotiate SSL connection.
 */
static int
open_server_SSL(Port *port)
{
	int			r;
	int			err;

	Assert(!port->ssl);
	Assert(!port->peer);

	if (!(port->ssl = SSL_new(SSL_context)))
	{
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("could not initialize SSL connection: %s",
						SSLerrmessage())));
		close_SSL(port);
		return -1;
	}
	if (!my_SSL_set_fd(port->ssl, port->sock))
	{
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("could not set SSL socket: %s",
						SSLerrmessage())));
		close_SSL(port);
		return -1;
	}

aloop:
	r = SSL_accept(port->ssl);
	if (r <= 0)
	{
		err = SSL_get_error(port->ssl, r);
		switch (err)
		{
			case SSL_ERROR_WANT_READ:
			case SSL_ERROR_WANT_WRITE:
#ifdef WIN32
				pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl),
											(err == SSL_ERROR_WANT_READ) ?
					   FD_READ | FD_CLOSE | FD_ACCEPT : FD_WRITE | FD_CLOSE,
											INFINITE);
#endif
				goto aloop;
			case SSL_ERROR_SYSCALL:
				if (r < 0)
					ereport(COMMERROR,
							(errcode_for_socket_access(),
							 errmsg("could not accept SSL connection: %m")));
				else
					ereport(COMMERROR,
							(errcode(ERRCODE_PROTOCOL_VIOLATION),
					errmsg("could not accept SSL connection: EOF detected")));
				break;
			case SSL_ERROR_SSL:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("could not accept SSL connection: %s",
								SSLerrmessage())));
				break;
			case SSL_ERROR_ZERO_RETURN:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
				   errmsg("could not accept SSL connection: EOF detected")));
				break;
			default:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("unrecognized SSL error code: %d",
								err)));
				break;
		}
		close_SSL(port);
		return -1;
	}

	port->count = 0;

	/* Get client certificate, if available. */
	port->peer = SSL_get_peer_certificate(port->ssl);

	/* and extract the Common Name from it. */
	port->peer_cn = NULL;
	if (port->peer != NULL)
	{
		int len;

		len = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer),
						NID_commonName, NULL, 0);

		if (len != -1)
		{
			char *peer_cn;

			peer_cn = MemoryContextAlloc(TopMemoryContext, len + 1);
			r = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer),
						      NID_commonName, peer_cn, len+1);

			peer_cn[len] = '\0';
			if (r != len)
			{
				/* shouldn't happen */
				pfree(peer_cn);
				close_SSL(port);
				return -1; 
			}

			/*
			 * Reject embedded NULLs in certificate common name to prevent
			 * attacks like CVE-2009-4034.
			 */
			if (len != strlen(peer_cn))
			{
				ereport(COMMERROR,
					(errcode(ERRCODE_PROTOCOL_VIOLATION),
					errmsg("SSL certificate's common name contains embedded null")));
				pfree(peer_cn);
				close_SSL(port);
				return -1;
			}

			port->peer_cn = peer_cn;
		}
	}

	ereport(DEBUG2,
			(errmsg("SSL connection from \"%s\"", 
				port->peer_cn ? port->peer_cn : "(anonymous)")));

	/* set up debugging/info callback */
	SSL_CTX_set_info_callback(SSL_context, info_cb);

	return 0;
}
Example #18
0
/*
 * Wait using linux's epoll_wait(2).
 *
 * This is the preferrable wait method, as several readiness notifications are
 * delivered, without having to iterate through all of set->events. The return
 * epoll_event struct contain a pointer to our events, making association
 * easy.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	struct epoll_event *cur_epoll_event;

	/* Sleep */
	rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
					nevents, cur_timeout);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("epoll_wait() failed: %m")));
		}
		return 0;
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	/*
	 * At least one event occurred, iterate over the returned epoll events
	 * until they're either all processed, or we've returned all the events
	 * the caller desired.
	 */
	for (cur_epoll_event = set->epoll_ret_events;
		 cur_epoll_event < (set->epoll_ret_events + rc) &&
		 returned_events < nevents;
		 cur_epoll_event++)
	{
		/* epoll's data pointer is set to the associated WaitEvent */
		cur_event = (WaitEvent *) cur_epoll_event->data.ptr;

		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
				 cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
		{
			/*
			 * We expect an EPOLLHUP when the remote end is closed, but
			 * because we don't expect the pipe to become readable or to have
			 * any errors either, treat those cases as postmaster death, too.
			 *
			 * As explained in the WAIT_USE_SELECT implementation, select(2)
			 * may spuriously return. Be paranoid about that here too, a
			 * spurious WL_POSTMASTER_DEATH would be painful.
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			Assert(cur_event->fd != PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				(cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				(cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
			{
				/* writable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}

	return returned_events;
}
Example #19
0
/*
 * Wait using select(2).
 *
 * XXX: On at least older linux kernels select(), in violation of POSIX,
 * doesn't reliably return a socket as writable if closed - but we rely on
 * that. So far all the known cases of this problem are on platforms that also
 * provide a poll() implementation without that bug.  If we find one where
 * that's not the case, we'll need to add a workaround.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	fd_set		input_mask;
	fd_set		output_mask;
	int			hifd;
	struct timeval tv;
	struct timeval *tvp = NULL;

	FD_ZERO(&input_mask);
	FD_ZERO(&output_mask);

	/*
	 * Prepare input/output masks. We do so every loop iteration as there's no
	 * entirely portable way to copy fd_sets.
	 */
	for (cur_event = set->events;
		 cur_event < (set->events + set->nevents);
		 cur_event++)
	{
		if (cur_event->events == WL_LATCH_SET)
			FD_SET(cur_event->fd, &input_mask);
		else if (cur_event->events == WL_POSTMASTER_DEATH)
			FD_SET(cur_event->fd, &input_mask);
		else
		{
			Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
			if (cur_event->events == WL_SOCKET_READABLE)
				FD_SET(cur_event->fd, &input_mask);
			else if (cur_event->events == WL_SOCKET_WRITEABLE)
				FD_SET(cur_event->fd, &output_mask);
		}

		if (cur_event->fd > hifd)
			hifd = cur_event->fd;
	}

	/* Sleep */
	if (cur_timeout >= 0)
	{
		tv.tv_sec = cur_timeout / 1000L;
		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
		tvp = &tv;
	}
	rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("select() failed: %m")));
		}
		return 0;				/* retry */
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	/*
	 * To associate events with select's masks, we have to check the status of
	 * the file descriptors associated with an event; by looping through all
	 * events.
	 */
	for (cur_event = set->events;
		 cur_event < (set->events + set->nevents)
		 && returned_events < nevents;
		 cur_event++)
	{
		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			FD_ISSET(cur_event->fd, &input_mask))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
				 FD_ISSET(cur_event->fd, &input_mask))
		{
			/*
			 * According to the select(2) man page on Linux, select(2) may
			 * spuriously return and report a file descriptor as readable,
			 * when it's not; and presumably so can poll(2).  It's not clear
			 * that the relevant cases would ever apply to the postmaster
			 * pipe, but since the consequences of falsely returning
			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
			 * trouble to positively verify EOF with PostmasterIsAlive().
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			Assert(cur_event->fd != PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				FD_ISSET(cur_event->fd, &input_mask))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				FD_ISSET(cur_event->fd, &output_mask))
			{
				/* socket is writeable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}
	return returned_events;
}
Example #20
0
/*
 * Client authentication starts here.  If there is an error, this
 * function does not return and the backend process is terminated.
 */
void
ClientAuthentication(Port *port)
{
	int			status = STATUS_ERROR;

	/*
	 * Get the authentication method to use for this frontend/database
	 * combination.  Note: a failure return indicates a problem with the
	 * hba config file, not with the request.  hba.c should have dropped
	 * an error message into the postmaster logfile if it failed.
	 */
	if (hba_getauthmethod(port) != STATUS_OK)
		ereport(FATAL,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("missing or erroneous pg_hba.conf file"),
				 errhint("See server log for details.")));

	switch (port->auth_method)
	{
		case uaReject:

			/*
			 * This could have come from an explicit "reject" entry in
			 * pg_hba.conf, but more likely it means there was no matching
			 * entry.  Take pity on the poor user and issue a helpful
			 * error message.  NOTE: this is not a security breach,
			 * because all the info reported here is known at the frontend
			 * and must be assumed known to bad guys. We're merely helping
			 * out the less clueful good guys.
			 */
			{
				char		hostinfo[NI_MAXHOST];

				getnameinfo_all(&port->raddr.addr, port->raddr.salen,
								hostinfo, sizeof(hostinfo),
								NULL, 0,
								NI_NUMERICHOST);

#ifdef USE_SSL
				ereport(FATAL,
				   (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
					errmsg("no pg_hba.conf entry for host \"%s\", user \"%s\", database \"%s\", %s",
						   hostinfo, port->user_name, port->database_name,
				   port->ssl ? gettext("SSL on") : gettext("SSL off"))));
#else
				ereport(FATAL,
				   (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
					errmsg("no pg_hba.conf entry for host \"%s\", user \"%s\", database \"%s\"",
					   hostinfo, port->user_name, port->database_name)));
#endif
				break;
			}

		case uaKrb4:
			/* Kerberos 4 only seems to work with AF_INET. */
			if (port->raddr.addr.ss_family != AF_INET
				|| port->laddr.addr.ss_family != AF_INET)
				ereport(FATAL,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				   errmsg("Kerberos 4 only supports IPv4 connections")));
			sendAuthRequest(port, AUTH_REQ_KRB4);
			status = pg_krb4_recvauth(port);
			break;

		case uaKrb5:
			sendAuthRequest(port, AUTH_REQ_KRB5);
			status = pg_krb5_recvauth(port);
			break;

		case uaIdent:
			/*
			 * If we are doing ident on unix-domain sockets, use SCM_CREDS
			 * only if it is defined and SO_PEERCRED isn't.
			 */
#if !defined(HAVE_GETPEEREID) && !defined(SO_PEERCRED) && \
	(defined(HAVE_STRUCT_CMSGCRED) || defined(HAVE_STRUCT_FCRED) || \
	 (defined(HAVE_STRUCT_SOCKCRED) && defined(LOCAL_CREDS)))
			if (port->raddr.addr.ss_family == AF_UNIX)
			{
#if defined(HAVE_STRUCT_FCRED) || defined(HAVE_STRUCT_SOCKCRED)
				/*
				 * Receive credentials on next message receipt, BSD/OS,
				 * NetBSD. We need to set this before the client sends the
				 * next packet.
				 */
				int			on = 1;

				if (setsockopt(port->sock, 0, LOCAL_CREDS, &on, sizeof(on)) < 0)
					ereport(FATAL,
							(errcode_for_socket_access(),
					 errmsg("could not enable credential reception: %m")));
#endif

				sendAuthRequest(port, AUTH_REQ_SCM_CREDS);
			}
#endif
			status = authident(port);
			break;

		case uaMD5:
			sendAuthRequest(port, AUTH_REQ_MD5);
			status = recv_and_check_password_packet(port);
			break;

		case uaCrypt:
			sendAuthRequest(port, AUTH_REQ_CRYPT);
			status = recv_and_check_password_packet(port);
			break;

		case uaPassword:
			sendAuthRequest(port, AUTH_REQ_PASSWORD);
			status = recv_and_check_password_packet(port);
			break;

#ifdef USE_PAM
		case uaPAM:
			pam_port_cludge = port;
			status = CheckPAMAuth(port, port->user_name, "");
			break;
#endif   /* USE_PAM */

		case uaTrust:
			status = STATUS_OK;
			break;
	}

	if (status == STATUS_OK)
		sendAuthRequest(port, AUTH_REQ_OK);
	else
		auth_failed(port, status);
}
Example #21
0
/*
 * StreamConnection -- create a new connection with client using
 *		server port.  Set port->sock to the FD of the new connection.
 *
 * ASSUME: that this doesn't need to be non-blocking because
 *		the Postmaster uses select() to tell when the server master
 *		socket is ready for accept().
 *
 * RETURNS: STATUS_OK or STATUS_ERROR
 */
int
StreamConnection(pgsocket server_fd, Port *port)
{
	/* accept connection and fill in the client (remote) address */
	port->raddr.salen = sizeof(port->raddr.addr);
	if ((port->sock = accept(server_fd,
							 (struct sockaddr *) & port->raddr.addr,
							 &port->raddr.salen)) < 0)
	{
		ereport(LOG,
				(errcode_for_socket_access(),
				 errmsg("could not accept new connection: %m")));

		/*
		 * If accept() fails then postmaster.c will still see the server
		 * socket as read-ready, and will immediately try again.  To avoid
		 * uselessly sucking lots of CPU, delay a bit before trying again.
		 * (The most likely reason for failure is being out of kernel file
		 * table slots; we can do little except hope some will get freed up.)
		 */
		pg_usleep(100000L);		/* wait 0.1 sec */
		return STATUS_ERROR;
	}

#ifdef SCO_ACCEPT_BUG

	/*
	 * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
	 * shouldn't hurt to catch it for all versions of those platforms.
	 */
	if (port->raddr.addr.ss_family == 0)
		port->raddr.addr.ss_family = AF_UNIX;
#endif

	/* fill in the server (local) address */
	port->laddr.salen = sizeof(port->laddr.addr);
	if (getsockname(port->sock,
					(struct sockaddr *) & port->laddr.addr,
					&port->laddr.salen) < 0)
	{
		elog(LOG, "getsockname() failed: %m");
		return STATUS_ERROR;
	}

	/* select NODELAY and KEEPALIVE options if it's a TCP connection */
	if (!IS_AF_UNIX(port->laddr.addr.ss_family))
	{
		int			on;

#ifdef	TCP_NODELAY
		on = 1;
		if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
					   (char *) &on, sizeof(on)) < 0)
		{
			elog(LOG, "setsockopt(TCP_NODELAY) failed: %m");
			return STATUS_ERROR;
		}
#endif
		on = 1;
		if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
					   (char *) &on, sizeof(on)) < 0)
		{
			elog(LOG, "setsockopt(SO_KEEPALIVE) failed: %m");
			return STATUS_ERROR;
		}

#ifdef WIN32

		/*
		 * This is a Win32 socket optimization.  The ideal size is 32k.
		 * http://support.microsoft.com/kb/823764/EN-US/
		 */
		on = PQ_BUFFER_SIZE * 4;
		if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &on,
					   sizeof(on)) < 0)
		{
			elog(LOG, "setsockopt(SO_SNDBUF) failed: %m");
			return STATUS_ERROR;
		}
#endif

		/*
		 * Also apply the current keepalive parameters.  If we fail to set a
		 * parameter, don't error out, because these aren't universally
		 * supported.  (Note: you might think we need to reset the GUC
		 * variables to 0 in such a case, but it's not necessary because the
		 * show hooks for these variables report the truth anyway.)
		 */
		(void) pq_setkeepalivesidle(tcp_keepalives_idle, port);
		(void) pq_setkeepalivesinterval(tcp_keepalives_interval, port);
		(void) pq_setkeepalivescount(tcp_keepalives_count, port);
	}

	return STATUS_OK;
}
Example #22
0
/*
 * ClientConnectionReady checks if the given connection is ready for non-blocking
 * reads or writes. This function is loosely based on pqSocketCheck() at fe-misc.c
 * and libpq_select() at libpqwalreceiver.c.
 */
static bool
ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatus)
{
	bool clientConnectionReady = false;
	int pollResult = 0;

	/* we use poll(2) if available, otherwise select(2) */
#ifdef HAVE_POLL
	int fileDescriptorCount = 1;
	int immediateTimeout = 0;
	int pollEventMask = 0;
	struct pollfd pollFileDescriptor;

	if (pollingStatus == PGRES_POLLING_READING)
	{
		pollEventMask = POLLERR | POLLIN;
	}
	else if (pollingStatus == PGRES_POLLING_WRITING)
	{
		pollEventMask = POLLERR | POLLOUT;
	}

	pollFileDescriptor.fd = PQsocket(connection);
	pollFileDescriptor.events = pollEventMask;
	pollFileDescriptor.revents = 0;

	pollResult = poll(&pollFileDescriptor, fileDescriptorCount, immediateTimeout);
#else /* !HAVE_POLL */

	fd_set readFileDescriptorSet;
	fd_set writeFileDescriptorSet;
	fd_set exceptionFileDescriptorSet;
	struct timeval immediateTimeout = { 0, 0 };
	int connectionFileDescriptor = PQsocket(connection);

	FD_ZERO(&readFileDescriptorSet);
	FD_ZERO(&writeFileDescriptorSet);
	FD_ZERO(&exceptionFileDescriptorSet);

	if (pollingStatus == PGRES_POLLING_READING)
	{
		FD_SET(connectionFileDescriptor, &exceptionFileDescriptorSet);
		FD_SET(connectionFileDescriptor, &readFileDescriptorSet);
	}
	else if (pollingStatus == PGRES_POLLING_WRITING)
	{
		FD_SET(connectionFileDescriptor, &exceptionFileDescriptorSet);
		FD_SET(connectionFileDescriptor, &writeFileDescriptorSet);
	}

	pollResult = select(connectionFileDescriptor + 1, &readFileDescriptorSet,
						&writeFileDescriptorSet, &exceptionFileDescriptorSet,
						&immediateTimeout);
#endif /* HAVE_POLL */

	if (pollResult > 0)
	{
		clientConnectionReady = true;
	}
	else if (pollResult == 0)
	{
		clientConnectionReady = false;
	}
	else if (pollResult < 0)
	{
		if (errno == EINTR)
		{
			/*
			 * If a signal was caught, we return false so the caller polls the
			 * connection again.
			 */
			clientConnectionReady = false;
		}
		else
		{
			/*
			 * poll() or select() can set errno to EFAULT (when socket is not
			 * contained in the calling program's address space), EBADF (invalid
			 * file descriptor), EINVAL (invalid arguments to select or poll),
			 * and ENOMEM (no space to allocate file descriptor tables). Out of
			 * these, only ENOMEM is likely here, and it is a fatal error, so we
			 * error out.
			 */
			Assert(errno == ENOMEM);
			ereport(ERROR, (errcode_for_socket_access(),
							errmsg("select()/poll() failed: %m")));
		}
	}

	return clientConnectionReady;
}
Example #23
0
/*
 * Postmaster subroutine to start a syslogger subprocess.
 */
int
SysLogger_Start(void)
{
	pid_t		sysloggerPid;
	char	   *filename;

	if (!Logging_collector)
		return 0;

	/*
	 * If first time through, create the pipe which will receive stderr
	 * output.
	 *
	 * If the syslogger crashes and needs to be restarted, we continue to use
	 * the same pipe (indeed must do so, since extant backends will be writing
	 * into that pipe).
	 *
	 * This means the postmaster must continue to hold the read end of the
	 * pipe open, so we can pass it down to the reincarnated syslogger. This
	 * is a bit klugy but we have little choice.
	 */
#ifndef WIN32
	if (syslogPipe[0] < 0)
	{
		if (pipe(syslogPipe) < 0)
			ereport(FATAL,
					(errcode_for_socket_access(),
					 (errmsg("could not create pipe for syslog: %m"))));
	}
#else
	if (!syslogPipe[0])
	{
		SECURITY_ATTRIBUTES sa;

		memset(&sa, 0, sizeof(SECURITY_ATTRIBUTES));
		sa.nLength = sizeof(SECURITY_ATTRIBUTES);
		sa.bInheritHandle = TRUE;

		if (!CreatePipe(&syslogPipe[0], &syslogPipe[1], &sa, 32768))
			ereport(FATAL,
					(errcode_for_file_access(),
					 (errmsg("could not create pipe for syslog: %m"))));
	}
#endif

	/*
	 * Create log directory if not present; ignore errors
	 */
	mkdir(Log_directory, S_IRWXU);

	/*
	 * The initial logfile is created right in the postmaster, to verify that
	 * the Log_directory is writable.  We save the reference time so that the
	 * syslogger child process can recompute this file name.
	 *
	 * It might look a bit strange to re-do this during a syslogger restart,
	 * but we must do so since the postmaster closed syslogFile after the
	 * previous fork (and remembering that old file wouldn't be right anyway).
	 * Note we always append here, we won't overwrite any existing file.  This
	 * is consistent with the normal rules, because by definition this is not
	 * a time-based rotation.
	 */
	first_syslogger_file_time = time(NULL);
	filename = logfile_getname(first_syslogger_file_time, NULL);

	syslogFile = logfile_open(filename, "a", false);

	pfree(filename);

#ifdef EXEC_BACKEND
	switch ((sysloggerPid = syslogger_forkexec()))
#else
	switch ((sysloggerPid = fork_process()))
#endif
	{
		case -1:
			ereport(LOG,
					(errmsg("could not fork system logger: %m")));
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
			/* Close the postmaster's sockets */
			ClosePostmasterPorts(true);

			/* Lose the postmaster's on-exit routines */
			on_exit_reset();

			/* Drop our connection to postmaster's shared memory, as well */
			PGSharedMemoryDetach();

			/* do the work */
			SysLoggerMain(0, NULL);
			break;
#endif

		default:
			/* success, in postmaster */

			/* now we redirect stderr, if not done already */
			if (!redirection_done)
			{
#ifndef WIN32
				fflush(stdout);
				if (dup2(syslogPipe[1], fileno(stdout)) < 0)
					ereport(FATAL,
							(errcode_for_file_access(),
							 errmsg("could not redirect stdout: %m")));
				fflush(stderr);
				if (dup2(syslogPipe[1], fileno(stderr)) < 0)
					ereport(FATAL,
							(errcode_for_file_access(),
							 errmsg("could not redirect stderr: %m")));
				/* Now we are done with the write end of the pipe. */
				close(syslogPipe[1]);
				syslogPipe[1] = -1;
#else
				int			fd;

				/*
				 * open the pipe in binary mode and make sure stderr is binary
				 * after it's been dup'ed into, to avoid disturbing the pipe
				 * chunking protocol.
				 */
				fflush(stderr);
				fd = _open_osfhandle((intptr_t) syslogPipe[1],
									 _O_APPEND | _O_BINARY);
				if (dup2(fd, _fileno(stderr)) < 0)
					ereport(FATAL,
							(errcode_for_file_access(),
							 errmsg("could not redirect stderr: %m")));
				close(fd);
				_setmode(_fileno(stderr), _O_BINARY);

				/*
				 * Now we are done with the write end of the pipe.
				 * CloseHandle() must not be called because the preceding
				 * close() closes the underlying handle.
				 */
				syslogPipe[1] = 0;
#endif
				redirection_done = true;
			}

			/* postmaster will never write the file; close it */
			fclose(syslogFile);
			syslogFile = NULL;
			return (int) sysloggerPid;
	}

	/* we should never reach here */
	return 0;
}
Example #24
0
/*
 * Main entry point for syslogger process
 * argc/argv parameters are valid only in EXEC_BACKEND case.
 */
NON_EXEC_STATIC void
SysLoggerMain(int argc, char *argv[])
{
#ifndef WIN32
	char		logbuffer[READ_BUF_SIZE];
	int			bytes_in_logbuffer = 0;
#endif
	char	   *currentLogDir;
	char	   *currentLogFilename;
	int			currentLogRotationAge;
	pg_time_t	now;

	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */

	MyProcPid = getpid();		/* reset MyProcPid */

	MyStartTime = time(NULL);	/* set our start time in case we call elog */
	now = MyStartTime;

#ifdef EXEC_BACKEND
	syslogger_parseArgs(argc, argv);
#endif   /* EXEC_BACKEND */

	am_syslogger = true;

	init_ps_display("logger process", "", "", "");

	/*
	 * If we restarted, our stderr is already redirected into our own input
	 * pipe.  This is of course pretty useless, not to mention that it
	 * interferes with detecting pipe EOF.	Point stderr to /dev/null. This
	 * assumes that all interesting messages generated in the syslogger will
	 * come through elog.c and will be sent to write_syslogger_file.
	 */
	if (redirection_done)
	{
		int			fd = open(DEVNULL, O_WRONLY, 0);

		/*
		 * The closes might look redundant, but they are not: we want to be
		 * darn sure the pipe gets closed even if the open failed.	We can
		 * survive running with stderr pointing nowhere, but we can't afford
		 * to have extra pipe input descriptors hanging around.
		 */
		close(fileno(stdout));
		close(fileno(stderr));
		if (fd != -1)
		{
			dup2(fd, fileno(stdout));
			dup2(fd, fileno(stderr));
			close(fd);
		}
	}

	/*
	 * Syslogger's own stderr can't be the syslogPipe, so set it back to text
	 * mode if we didn't just close it. (It was set to binary in
	 * SubPostmasterMain).
	 */
#ifdef WIN32
	else
		_setmode(_fileno(stderr), _O_TEXT);
#endif

	/*
	 * Also close our copy of the write end of the pipe.  This is needed to
	 * ensure we can detect pipe EOF correctly.  (But note that in the restart
	 * case, the postmaster already did this.)
	 */
#ifndef WIN32
	if (syslogPipe[1] >= 0)
		close(syslogPipe[1]);
	syslogPipe[1] = -1;
#else
	if (syslogPipe[1])
		CloseHandle(syslogPipe[1]);
	syslogPipe[1] = 0;
#endif

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(syslogger probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	InitializeLatchSupport();	/* needed for latch waits */

	/* Initialize private latch for use by signal handlers */
	InitLatch(&sysLoggerLatch);

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we ignore all termination signals, and instead exit only when all
	 * upstream processes are gone, to ensure we don't miss any dying gasps of
	 * broken backends...
	 */

	pqsignal(SIGHUP, sigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, SIG_IGN);
	pqsignal(SIGTERM, SIG_IGN);
	pqsignal(SIGQUIT, SIG_IGN);
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, sigUsr1Handler);	/* request log rotation */
	pqsignal(SIGUSR2, SIG_IGN);

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	PG_SETMASK(&UnBlockSig);

#ifdef WIN32
	/* Fire up separate data transfer thread */
	InitializeCriticalSection(&sysloggerSection);
	EnterCriticalSection(&sysloggerSection);

	threadHandle = (HANDLE) _beginthreadex(NULL, 0, pipeThread, NULL, 0, NULL);
	if (threadHandle == 0)
		elog(FATAL, "could not create syslogger data transfer thread: %m");
#endif   /* WIN32 */

	/*
	 * Remember active logfile's name.  We recompute this from the reference
	 * time because passing down just the pg_time_t is a lot cheaper than
	 * passing a whole file path in the EXEC_BACKEND case.
	 */
	last_file_name = logfile_getname(first_syslogger_file_time, NULL);

	/* remember active logfile parameters */
	currentLogDir = pstrdup(Log_directory);
	currentLogFilename = pstrdup(Log_filename);
	currentLogRotationAge = Log_RotationAge;
	/* set next planned rotation time */
	set_next_rotation_time();

	/* main worker loop */
	for (;;)
	{
		bool		time_based_rotation = false;
		int			size_rotation_for = 0;
		long		cur_timeout;
		int			cur_flags;

#ifndef WIN32
		int			rc;
#endif

		/* Clear any already-pending wakeups */
		ResetLatch(&sysLoggerLatch);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);

			/*
			 * Check if the log directory or filename pattern changed in
			 * postgresql.conf. If so, force rotation to make sure we're
			 * writing the logfiles in the right place.
			 */
			if (strcmp(Log_directory, currentLogDir) != 0)
			{
				pfree(currentLogDir);
				currentLogDir = pstrdup(Log_directory);
				rotation_requested = true;

				/*
				 * Also, create new directory if not present; ignore errors
				 */
				mkdir(Log_directory, S_IRWXU);
			}
			if (strcmp(Log_filename, currentLogFilename) != 0)
			{
				pfree(currentLogFilename);
				currentLogFilename = pstrdup(Log_filename);
				rotation_requested = true;
			}

			/*
			 * If rotation time parameter changed, reset next rotation time,
			 * but don't immediately force a rotation.
			 */
			if (currentLogRotationAge != Log_RotationAge)
			{
				currentLogRotationAge = Log_RotationAge;
				set_next_rotation_time();
			}

			/*
			 * If we had a rotation-disabling failure, re-enable rotation
			 * attempts after SIGHUP, and force one immediately.
			 */
			if (rotation_disabled)
			{
				rotation_disabled = false;
				rotation_requested = true;
			}
		}

		if (Log_RotationAge > 0 && !rotation_disabled)
		{
			/* Do a logfile rotation if it's time */
			now = (pg_time_t) time(NULL);
			if (now >= next_rotation_time)
				rotation_requested = time_based_rotation = true;
		}

		if (!rotation_requested && Log_RotationSize > 0 && !rotation_disabled)
		{
			/* Do a rotation if file is too big */
			if (ftell(syslogFile) >= Log_RotationSize * 1024L)
			{
				rotation_requested = true;
				size_rotation_for |= LOG_DESTINATION_STDERR;
			}
			if (csvlogFile != NULL &&
				ftell(csvlogFile) >= Log_RotationSize * 1024L)
			{
				rotation_requested = true;
				size_rotation_for |= LOG_DESTINATION_CSVLOG;
			}
		}

		if (rotation_requested)
		{
			/*
			 * Force rotation when both values are zero. It means the request
			 * was sent by pg_rotate_logfile.
			 */
			if (!time_based_rotation && size_rotation_for == 0)
				size_rotation_for = LOG_DESTINATION_STDERR | LOG_DESTINATION_CSVLOG;
			logfile_rotate(time_based_rotation, size_rotation_for);
		}

		/*
		 * Calculate time till next time-based rotation, so that we don't
		 * sleep longer than that.	We assume the value of "now" obtained
		 * above is still close enough.  Note we can't make this calculation
		 * until after calling logfile_rotate(), since it will advance
		 * next_rotation_time.
		 *
		 * Also note that we need to beware of overflow in calculation of the
		 * timeout: with large settings of Log_RotationAge, next_rotation_time
		 * could be more than INT_MAX msec in the future.  In that case we'll
		 * wait no more than INT_MAX msec, and try again.
		 */
		if (Log_RotationAge > 0 && !rotation_disabled)
		{
			pg_time_t	delay;

			delay = next_rotation_time - now;
			if (delay > 0)
			{
				if (delay > INT_MAX / 1000)
					delay = INT_MAX / 1000;
				cur_timeout = delay * 1000L;	/* msec */
			}
			else
				cur_timeout = 0;
			cur_flags = WL_TIMEOUT;
		}
		else
		{
			cur_timeout = -1L;
			cur_flags = 0;
		}

		/*
		 * Sleep until there's something to do
		 */
#ifndef WIN32
		rc = WaitLatchOrSocket(&sysLoggerLatch,
							   WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags,
							   syslogPipe[0],
							   cur_timeout);

		if (rc & WL_SOCKET_READABLE)
		{
			int			bytesRead;

			bytesRead = read(syslogPipe[0],
							 logbuffer + bytes_in_logbuffer,
							 sizeof(logbuffer) - bytes_in_logbuffer);
			if (bytesRead < 0)
			{
				if (errno != EINTR)
					ereport(LOG,
							(errcode_for_socket_access(),
							 errmsg("could not read from logger pipe: %m")));
			}
			else if (bytesRead > 0)
			{
				bytes_in_logbuffer += bytesRead;
				process_pipe_input(logbuffer, &bytes_in_logbuffer);
				continue;
			}
			else
			{
				/*
				 * Zero bytes read when select() is saying read-ready means
				 * EOF on the pipe: that is, there are no longer any processes
				 * with the pipe write end open.  Therefore, the postmaster
				 * and all backends are shut down, and we are done.
				 */
				pipe_eof_seen = true;

				/* if there's any data left then force it out now */
				flush_pipe_input(logbuffer, &bytes_in_logbuffer);
			}
		}
#else							/* WIN32 */

		/*
		 * On Windows we leave it to a separate thread to transfer data and
		 * detect pipe EOF.  The main thread just wakes up to handle SIGHUP
		 * and rotation conditions.
		 *
		 * Server code isn't generally thread-safe, so we ensure that only one
		 * of the threads is active at a time by entering the critical section
		 * whenever we're not sleeping.
		 */
		LeaveCriticalSection(&sysloggerSection);

		(void) WaitLatch(&sysLoggerLatch,
						 WL_LATCH_SET | cur_flags,
						 cur_timeout);

		EnterCriticalSection(&sysloggerSection);
#endif   /* WIN32 */

		if (pipe_eof_seen)
		{
			/*
			 * seeing this message on the real stderr is annoying - so we make
			 * it DEBUG1 to suppress in normal use.
			 */
			ereport(DEBUG1,
					(errmsg("logger shutting down")));

			/*
			 * Normal exit from the syslogger is here.	Note that we
			 * deliberately do not close syslogFile before exiting; this is to
			 * allow for the possibility of elog messages being generated
			 * inside proc_exit.  Regular exit() will take care of flushing
			 * and closing stdio channels.
			 */
			proc_exit(0);
		}
	}
}
Example #25
0
/*
 * Read a message from the specified connection carrying pid numbers
 * of transactions interacting with pooler
 */
int
pool_recvpids(PoolPort *port, int **pids)
{
	int			r, i;
	uint		n32;
	char		buf[SEND_PID_BUFFER_SIZE];

	/*
	 * Buffer size is upper bounded by the maximum number of connections,
	 * as in the pooler each connection has one Pooler Agent.
	 */

	r = recv(Socket(*port), &buf, SEND_PID_BUFFER_SIZE, 0);
	if (r < 0)
	{
		/*
		 * Report broken connection
		 */
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("could not receive data from client: %m")));
		goto failure;
	}
	else if (r == 0)
	{
		goto failure;
	}
	else if (r != SEND_PID_BUFFER_SIZE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("incomplete message from client")));
		goto failure;
	}

	/* Verify response */
	if (buf[0] != 'p')
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("unexpected message code")));
		goto failure;
	}

	memcpy(&n32, buf + 1, 4);
	n32 = ntohl(n32);
	if (n32 == 0)
	{
		elog(WARNING, "No transaction to abort");
		return n32;
	}

	*pids = (int *) palloc(sizeof(int) * n32);

	for (i = 0; i < n32; i++)
	{
		int n;
		memcpy(&n, buf + 5 + i * sizeof(int), sizeof(int));
		*pids[i] = ntohl(n);
	}
	return n32;

failure:
	return 0;
}
Example #26
0
/*
 * Wait using poll(2).
 *
 * This allows to receive readiness notifications for several events at once,
 * but requires iterating through all of set->pollfds.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	struct pollfd *cur_pollfd;

	/* Sleep */
	rc = poll(set->pollfds, set->nevents, (int) cur_timeout);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("poll() failed: %m")));
		}
		return 0;
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	for (cur_event = set->events, cur_pollfd = set->pollfds;
		 cur_event < (set->events + set->nevents) &&
		 returned_events < nevents;
		 cur_event++, cur_pollfd++)
	{
		/* no activity on this FD, skip */
		if (cur_pollfd->revents == 0)
			continue;

		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			(cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
			 (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
		{
			/*
			 * We expect an POLLHUP when the remote end is closed, but because
			 * we don't expect the pipe to become readable or to have any
			 * errors either, treat those cases as postmaster death, too.
			 *
			 * As explained in the WAIT_USE_SELECT implementation, select(2)
			 * may spuriously return. Be paranoid about that here too, a
			 * spurious WL_POSTMASTER_DEATH would be painful.
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			int			errflags = POLLHUP | POLLERR | POLLNVAL;

			Assert(cur_event->fd >= PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				(cur_pollfd->revents & (POLLIN | errflags)))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				(cur_pollfd->revents & (POLLOUT | errflags)))
			{
				/* writeable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}
	return returned_events;
}
Example #27
0
int
StreamServerPort(int family, char *hostName, unsigned short portNumber,
				 char *unixSocketName,
				 pgsocket ListenSocket[], int MaxListen)
{
	pgsocket	fd;
	int			err;
	int			maxconn;
	int			ret;
	char		portNumberStr[32];
	const char *familyDesc;
	char		familyDescBuf[64];
	char	   *service;
	struct addrinfo *addrs = NULL,
			   *addr;
	struct addrinfo hint;
	int			listen_index = 0;
	int			added = 0;

#if !defined(WIN32) || defined(IPV6_V6ONLY)
	int			one = 1;
#endif

	/* Initialize hint structure */
	MemSet(&hint, 0, sizeof(hint));
	hint.ai_family = family;
	hint.ai_flags = AI_PASSIVE;
	hint.ai_socktype = SOCK_STREAM;

#ifdef HAVE_UNIX_SOCKETS
	if (family == AF_UNIX)
	{
		/* Lock_AF_UNIX will also fill in sock_path. */
		if (Lock_AF_UNIX(portNumber, unixSocketName) != STATUS_OK)
			return STATUS_ERROR;
		service = sock_path;
	}
	else
#endif   /* HAVE_UNIX_SOCKETS */
	{
		snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
		service = portNumberStr;
	}

	ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
	if (ret || !addrs)
	{
		if (hostName)
			ereport(LOG,
					(errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
							hostName, service, gai_strerror(ret))));
		else
			ereport(LOG,
				 (errmsg("could not translate service \"%s\" to address: %s",
						 service, gai_strerror(ret))));
		if (addrs)
			pg_freeaddrinfo_all(hint.ai_family, addrs);
		return STATUS_ERROR;
	}

	for (addr = addrs; addr; addr = addr->ai_next)
	{
		if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
		{
			/*
			 * Only set up a unix domain socket when they really asked for it.
			 * The service/port is different in that case.
			 */
			continue;
		}

		/* See if there is still room to add 1 more socket. */
		for (; listen_index < MaxListen; listen_index++)
		{
			if (ListenSocket[listen_index] == PGINVALID_SOCKET)
				break;
		}
		if (listen_index >= MaxListen)
		{
			ereport(LOG,
					(errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
							MaxListen)));
			break;
		}

		/* set up family name for possible error messages */
		switch (addr->ai_family)
		{
			case AF_INET:
				familyDesc = _("IPv4");
				break;
#ifdef HAVE_IPV6
			case AF_INET6:
				familyDesc = _("IPv6");
				break;
#endif
#ifdef HAVE_UNIX_SOCKETS
			case AF_UNIX:
				familyDesc = _("Unix");
				break;
#endif
			default:
				snprintf(familyDescBuf, sizeof(familyDescBuf),
						 _("unrecognized address family %d"),
						 addr->ai_family);
				familyDesc = familyDescBuf;
				break;
		}

		if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) < 0)
		{
			ereport(LOG,
					(errcode_for_socket_access(),
			/* translator: %s is IPv4, IPv6, or Unix */
					 errmsg("could not create %s socket: %m",
							familyDesc)));
			continue;
		}

#ifndef WIN32

		/*
		 * Without the SO_REUSEADDR flag, a new postmaster can't be started
		 * right away after a stop or crash, giving "address already in use"
		 * error on TCP ports.
		 *
		 * On win32, however, this behavior only happens if the
		 * SO_EXLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows multiple
		 * servers to listen on the same address, resulting in unpredictable
		 * behavior. With no flags at all, win32 behaves as Unix with
		 * SO_REUSEADDR.
		 */
		if (!IS_AF_UNIX(addr->ai_family))
		{
			if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
							(char *) &one, sizeof(one))) == -1)
			{
				ereport(LOG,
						(errcode_for_socket_access(),
						 errmsg("setsockopt(SO_REUSEADDR) failed: %m")));
				closesocket(fd);
				continue;
			}
		}
#endif

#ifdef IPV6_V6ONLY
		if (addr->ai_family == AF_INET6)
		{
			if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
						   (char *) &one, sizeof(one)) == -1)
			{
				ereport(LOG,
						(errcode_for_socket_access(),
						 errmsg("setsockopt(IPV6_V6ONLY) failed: %m")));
				closesocket(fd);
				continue;
			}
		}
#endif

		/*
		 * Note: This might fail on some OS's, like Linux older than
		 * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
		 * ipv4 addresses to ipv6.	It will show ::ffff:ipv4 for all ipv4
		 * connections.
		 */
		err = bind(fd, addr->ai_addr, addr->ai_addrlen);
		if (err < 0)
		{
			ereport(LOG,
					(errcode_for_socket_access(),
			/* translator: %s is IPv4, IPv6, or Unix */
					 errmsg("could not bind %s socket: %m",
							familyDesc),
					 (IS_AF_UNIX(addr->ai_family)) ?
				  errhint("Is another postmaster already running on port %d?"
						  " If not, remove socket file \"%s\" and retry.",
						  (int) portNumber, sock_path) :
				  errhint("Is another postmaster already running on port %d?"
						  " If not, wait a few seconds and retry.",
						  (int) portNumber)));
			closesocket(fd);
			continue;
		}

#ifdef HAVE_UNIX_SOCKETS
		if (addr->ai_family == AF_UNIX)
		{
			if (Setup_AF_UNIX() != STATUS_OK)
			{
				closesocket(fd);
				break;
			}
		}
#endif

		/*
		 * Select appropriate accept-queue length limit.  PG_SOMAXCONN is only
		 * intended to provide a clamp on the request on platforms where an
		 * overly large request provokes a kernel error (are there any?).
		 */
		maxconn = MaxBackends * 2;
		if (maxconn > PG_SOMAXCONN)
			maxconn = PG_SOMAXCONN;

		err = listen(fd, maxconn);
		if (err < 0)
		{
			ereport(LOG,
					(errcode_for_socket_access(),
			/* translator: %s is IPv4, IPv6, or Unix */
					 errmsg("could not listen on %s socket: %m",
							familyDesc)));
			closesocket(fd);
			continue;
		}
		ListenSocket[listen_index] = fd;
		added++;
	}

	pg_freeaddrinfo_all(hint.ai_family, addrs);

	if (!added)
		return STATUS_ERROR;

	return STATUS_OK;
}
Example #28
0
/*
 *	Attempt to negotiate SSL connection.
 */
static int
open_server_SSL(Port *port)
{
	int			r;
	int			err;

	Assert(!port->ssl);
	Assert(!port->peer);

	if (!(port->ssl = SSL_new(SSL_context)))
	{
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("could not initialize SSL connection: %s",
						SSLerrmessage())));
		close_SSL(port);
		return -1;
	}
	if (!my_SSL_set_fd(port->ssl, port->sock))
	{
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("could not set SSL socket: %s",
						SSLerrmessage())));
		close_SSL(port);
		return -1;
	}

aloop:
	r = SSL_accept(port->ssl);
	if (r <= 0)
	{
		err = SSL_get_error(port->ssl, r);
		switch (err)
		{
			case SSL_ERROR_WANT_READ:
			case SSL_ERROR_WANT_WRITE:
#ifdef WIN32
				pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl),
											(err == SSL_ERROR_WANT_READ) ?
						FD_READ | FD_CLOSE | FD_ACCEPT : FD_WRITE | FD_CLOSE,
											INFINITE);
#endif
				goto aloop;
			case SSL_ERROR_SYSCALL:
				if (r < 0)
					ereport(COMMERROR,
							(errcode_for_socket_access(),
							 errmsg("could not accept SSL connection: %m")));
				else
					ereport(COMMERROR,
							(errcode(ERRCODE_PROTOCOL_VIOLATION),
					errmsg("could not accept SSL connection: EOF detected")));
				break;
			case SSL_ERROR_SSL:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("could not accept SSL connection: %s",
								SSLerrmessage())));
				break;
			case SSL_ERROR_ZERO_RETURN:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
				   errmsg("could not accept SSL connection: EOF detected")));
				break;
			default:
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("unrecognized SSL error code: %d",
								err)));
				break;
		}
		close_SSL(port);
		return -1;
	}

	port->count = 0;

	/* get client certificate, if available. */
	port->peer = SSL_get_peer_certificate(port->ssl);
	if (port->peer == NULL)
	{
		strlcpy(port->peer_dn, "(anonymous)", sizeof(port->peer_dn));
		strlcpy(port->peer_cn, "(anonymous)", sizeof(port->peer_cn));
	}
	else
	{
		X509_NAME_oneline(X509_get_subject_name(port->peer),
						  port->peer_dn, sizeof(port->peer_dn));
		port->peer_dn[sizeof(port->peer_dn) - 1] = '\0';
		r = X509_NAME_get_text_by_NID(X509_get_subject_name(port->peer),
					   NID_commonName, port->peer_cn, sizeof(port->peer_cn));
		port->peer_cn[sizeof(port->peer_cn) - 1] = '\0';
		if (r == -1)
		{
			/* Unable to get the CN, set it to blank so it can't be used */
			port->peer_cn[0] = '\0';
		}
		else
		{
			/*
			 * Reject embedded NULLs in certificate common name to prevent
			 * attacks like CVE-2009-4034.
			 */
			if (r != strlen(port->peer_cn))
			{
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("SSL certificate's common name contains embedded null")));
				close_SSL(port);
				return -1;
			}
		}
	}
	ereport(DEBUG2,
			(errmsg("SSL connection from \"%s\"", port->peer_cn)));

	/* set up debugging/info callback */
	SSL_CTX_set_info_callback(SSL_context, info_cb);

	return 0;
}
Example #29
0
/* --------------------------------
 *		pq_getbyte_if_available - get a single byte from connection,
 *			if available
 *
 * The received byte is stored in *c. Returns 1 if a byte was read,
 * 0 if no data was available, or EOF if trouble.
 * --------------------------------
 */
int
pq_getbyte_if_available(unsigned char *c)
{
	int			r;

	if (PqRecvPointer < PqRecvLength)
	{
		*c = PqRecvBuffer[PqRecvPointer++];
		return 1;
	}

	/* Temporarily put the socket into non-blocking mode */
#ifdef WIN32
	pgwin32_noblock = 1;
#else
	if (!pg_set_noblock(MyProcPort->sock))
		ereport(ERROR,
				(errmsg("could not set socket to non-blocking mode: %m")));
#endif
	MyProcPort->noblock = true;
	PG_TRY();
	{
		r = secure_read(MyProcPort, c, 1);
		if (r < 0)
		{
			/*
			 * Ok if no data available without blocking or interrupted (though
			 * EINTR really shouldn't happen with a non-blocking socket).
			 * Report other errors.
			 */
			if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
				r = 0;
			else
			{
				/*
				 * Careful: an ereport() that tries to write to the client
				 * would cause recursion to here, leading to stack overflow
				 * and core dump!  This message must go *only* to the
				 * postmaster log.
				 */
				ereport(COMMERROR,
						(errcode_for_socket_access(),
						 errmsg("could not receive data from client: %m")));
				r = EOF;
			}
		}
		else if (r == 0)
		{
			/* EOF detected */
			r = EOF;
		}
	}
	PG_CATCH();
	{
		/*
		 * The rest of the backend code assumes the socket is in blocking
		 * mode, so treat failure as FATAL.
		 */
#ifdef WIN32
		pgwin32_noblock = 0;
#else
		if (!pg_set_block(MyProcPort->sock))
			ereport(FATAL,
					(errmsg("could not set socket to blocking mode: %m")));
#endif
		MyProcPort->noblock = false;
		PG_RE_THROW();
	}
	PG_END_TRY();
#ifdef WIN32
	pgwin32_noblock = 0;
#else
	if (!pg_set_block(MyProcPort->sock))
		ereport(FATAL,
				(errmsg("could not set socket to blocking mode: %m")));
#endif
	MyProcPort->noblock = false;

	return r;
}
Example #30
0
/*
 * Read a message from the specified connection carrying file descriptors
 */
int
pool_recvfds(PoolPort *port, int *fds, int count)
{
	int			r;
	uint		n32;
	char		buf[SEND_MSG_BUFFER_SIZE];
	struct iovec iov[1];
	struct msghdr msg;
	int                     controllen = CMSG_LEN(count * sizeof(int));
	struct cmsghdr *cmptr = malloc(CMSG_SPACE(count * sizeof(int)));

	if (cmptr == NULL)
		return EOF;

	iov[0].iov_base = buf;
	iov[0].iov_len = SEND_MSG_BUFFER_SIZE;
	msg.msg_iov = iov;
	msg.msg_iovlen = 1;
	msg.msg_name = NULL;
	msg.msg_namelen = 0;
	msg.msg_control = (caddr_t) cmptr;
	msg.msg_controllen = controllen;

	r = recvmsg(Socket(*port), &msg, 0);
	if (r < 0)
	{
		/*
		 * Report broken connection
		 */
		ereport(ERROR,
				(errcode_for_socket_access(),
				 errmsg("could not receive data from client: %m")));
		goto failure;
	}
	else if (r == 0)
	{
		goto failure;
	}
	else if (r != SEND_MSG_BUFFER_SIZE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("incomplete message from client")));
		goto failure;
	}

	/* Verify response */
	if (buf[0] != 'f')
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("unexpected message code")));
		goto failure;
	}

	memcpy(&n32, buf + 1, 4);
	n32 = ntohl(n32);
	if (n32 != 8)
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("invalid message size")));
		goto failure;
	}

	/*
	 * If connection count is 0 it means pool does not have connections
	 * to  fulfill request. Otherwise number of returned connections
	 * should be equal to requested count. If it not the case consider this
	 * a protocol violation. (Probably connection went out of sync)
	 */
	memcpy(&n32, buf + 5, 4);
	n32 = ntohl(n32);
	if (n32 == 0)
	{
		ereport(LOG,
				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
				 errmsg("failed to acquire connections")));
		goto failure;
	}

	if (n32 != count)
	{
		ereport(ERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("unexpected connection count")));
		goto failure;
	}

	memcpy(fds, CMSG_DATA(CMSG_FIRSTHDR(&msg)), count * sizeof(int));
	free(cmptr);
	return 0;
failure:
	free(cmptr);
	return EOF;
}